In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mybiotools as mbt
import pr_peaks

# 2018-02-20 Bare chair model

Here I want to look at what are the essential features to reproduce the non-monotonic behaviour that I observed in the ratio between H and L sites in the ChIP-seq experiments. Clearly there were too many parameters in the previous edition. Here I want to look at only one site competing with some other sites and looking at whether there are any interesting effects that one can speculate upon.

In [None]:
# init constants of the system
N = 500                                    # number of equivalent systems
n = 100                                    # number of sites in each system

In [None]:
# init taus
site_taus = 2.0 * np.ones(n)
Hsite = 49
site_taus[Hsite] = 20.0

In [None]:
nsteps = 1000
sigma = None
phi = np.array([0.01,0.10,0.25,0.50,0.75])
mus = (n*phi).astype(np.int32)
print mus

In [None]:
%%time
Tflat = np.ones((n,n))/n
flat = pr_peaks.JumpingModel(Tflat,site_taus)
for mu in mus :
    mbt.log_message('Flat','mu = %d'%(mu))
    omega_t_initial = pr_peaks.init_omega_t(N,n,mu)
    flat.run(nsteps,mu,sigma,omega_t_initial)

In [None]:
def H_to_L(model,Hsites,Lsites) :
    mus = model.occupancy.keys()
    mus.sort()
    nmus = len(mus)
    model.avH = {}
    model.avL = {}
    model.H_to_L = np.zeros(nmus)
    for i,mu in enumerate(mus) :
        model.avH[mu] = model.occupancy[mu][Hsites].mean()
        model.avL[mu] = model.occupancy[mu][Lsites].mean()
        model.H_to_L[i] = model.avH[mu]/model.avL[mu]

In [None]:
Lsites = [i for i in xrange(n)]
Lsites.remove(Hsite)

In [None]:
# prepare the transition matrix
THL = np.ones((n,n))
for i,site in enumerate(Lsites) :
    THL[Hsite,i] += 1
    THL[i,Hsite] += 1
THL = mbt.row_normalize_matrix(THL)

In [None]:
%%time
HL = pr_peaks.JumpingModel(THL,site_taus)
for mu in mus :
    mbt.log_message('HL','mu = %d'%(mu))
    omega_t_initial = pr_peaks.init_omega_t(N,n,mu)
    HL.run(nsteps,mu,sigma,omega_t_initial)

In [None]:
H_to_L(HL,[Hsite],Lsites)
H_to_L(flat,[Hsite],Lsites)

In [None]:
plt.plot(mus,flat.H_to_L,'r')
plt.plot(mus,HL.H_to_L,'b')
plt.xlabel(r'$\mu$')
plt.ylabel('H to L ratio')
plt.axhline(y=1.0,linestyle='--',color='k')
plt.show()

### Intermezzo: profiling the code

The execution time of the code above is frustratingly long. Therefore I'd like to do the ancient thing of tryig to figure out what is the reason for this, by profiling the code.

In [None]:
import cProfile
cProfile.run('HL.run(nsteps,50,sigma,omega_t_initial)')

The result of this analysis is that there is a lot of time spent in the "np.random.choice" function. There would really be little or no benefit in trying to optimize calls to that function, so I leave it here and desist.

In [None]:
mu = 25
flat.occupancy[mu][Hsite]/flat.occupancy[mu][Lsites].mean()

In [None]:
flat.avH[mu]/flat.avL[mu]

In [None]:
flat.H_to_L