In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mybiotools as mbt
import pr_peaks

# 2018-02-26 Jumps into the void
I am now sure that there is the non-monotonic behaviour of the ratio of reads between the high and the low peaks as a function of the concentration of hormone given. There are two possible explanations for this effect:

1. This is a result of some experimental bias or experimental artefact.
2. This is a real effect, and can be explained based on realistic assumptions about how the diffusion of TFs work inside cell nuclei.

In this notebook I want to follow line number 2, and try to pursue an explanation for the observed effect.

One effect that I systematically neglected so far is the fact that in a realistic situation the searchers (proteins that search the DNA) can also be searching in the 3D space of the cell nuclei. This represents a special state of the model, because *it does not have a limitation on the number of searchers that can be in it*. To model this state, I can add another site to the system, update the transition probabilities correspondingly, and perform the multi-agent simulations.

Technically speaking, this requires chainging the code base because so far I treated the occupancy vector as a boolean vector, which implicitly takes into account the fact that no site 
can be occupied by two particles at the same time. Now I have to add another site which can be 
occupied by any number of particles.

Here I want to do a quick test of what are the consequences of adding this void state. I'll re-code the simulation routines to allow for this extra state.

In [None]:
class Searcher :
    def __init__(self,index,site,td) :
        self.index = index
        self.site = site
        self.td = td

Here I start recoding. The new "void" state will be treated as the last state of the `omega_t` vector. This has the special property of having any number of searchers occupying it. At initialization, a certain number of sites are occupied, and the void state is occupied by a certain fraction `phi` of the total number of searchers. In the following function, we know how many searchers are in the network states by summing over `omega_t` up to the $n-1$-th site.

In [None]:
def init_searchers(omega_t,site_taus,deterministic=False) :
    # get parameters from the omega_t matrix
    n = omega_t.size-1
    m = omega_t.sum()
    nvoid = omega_t[n]
    # init the searchers
    searchers = []
    # "searcher_sites" is a vector of indices containing the indices
    # of the sites occupied by the searchers
    searcher_sites = np.where(omega_t[:n])[0]
    # init the searchers that are on the network
    for s in xrange(m-nvoid) :
        site = searcher_sites[s]
        if deterministic :
            td = site_taus[site]
        else :
            td = np.random.exponential(scale=site_taus[site])
        searcher = Searcher(s,site,td)
        searchers.append(searcher)
    # init the searchers that are in the void state
    tauvoid = site_taus[n]
    for s in xrange(nvoid) :    
        if deterministic :
            td = tauvoid
        else :
            td = np.random.exponential(scale=tauvoid)
        searcher = Searcher(m-nvoid+s,n,td)
        searchers.append(searcher)
    return searchers

In all the code, $n$ is the number of states in the network. The total number of states will always be $n+1$. Now the `omega_t` vector is not made of boolean values but of integers, because we want to know how many searchers are on the network and how many are in the void state.

In [None]:
def init_omega_t(n,mu,phi,seed=None) :
    # init the random number generator if it was passed
    if seed is not None :
        np.random.seed(seed)
    # init omega vector
    omega_t = np.zeros(n+1,dtype=np.int32)
    # the number of void sites occupied
    nvoid = int(mu*phi)
    # fill with initial occupancy
    omega_t[np.random.choice(n,mu-nvoid,replace=False)] = 1
    omega_t[n] = nvoid
    return omega_t

Now the main simulation loop is modified accordingly. I init a `buff` boolean vector with dimension $n+1$. The last element is always `True`, because all the searchers can go to the void state at any moment. The other elements are akin to what was before the `omega_t` vector: `True` or `False` according to occupied/non-occupied state.

In [None]:
def run_chair_simulation(nsteps,omega_t_initial,T,site_taus,
                         seed=None,teq=0,tsample=1,deterministic=False) :
    # init the random number generator if it was passed
    if seed is not None :
        np.random.seed(seed)
    # make an internal copy of the initial omega_t matrix
    omega_t = omega_t_initial.copy()
    # no need to pass N,n,m through the arguments of the function
    n = omega_t.size-1
    m = omega_t.sum()
    # init jumping matrix
    J = np.zeros((n+1,n+1)).astype(np.int32)
    # init searchers
    searchers = init_searchers(omega_t,site_taus,deterministic=deterministic)
    # init sampling matrix
    nsamples = (nsteps-teq)/tsample
    samples = np.zeros((nsamples,n),dtype=np.int32)
    i_sample = 0
    # init passage buffer
    buff = np.ones(n+1,dtype=bool)
    # cycle on time
    for step in xrange(1,nsteps+1) :
        # cycle on the searchers
        for s in xrange(m) :
            # print s,m
            searcher = searchers[s]
            # if the searcher has to stay longer on the site, skip it
            if step>=searcher.td :
                # if not, get the elements corresponding to the transition matrix:
                # note here that buff represents the occupancy vector, but is always
                # filled up to the element n-1 of the vector, because the last one corresponds
                # to jumps to the void state, which are always allowed
                buff[:n] = omega_t[:n]==0
                Tstar = T[searcher.site,:] * buff
                Tstar /= Tstar.sum()
                # now get the next site
                next_site = np.random.choice(n+1,p=Tstar)
                # update omega matrix
                omega_t[searcher.site] -= 1
                omega_t[next_site] += 1
                # update jumping matrix
                J[searcher.site,next_site] += 1
                # update searcher
                searcher.site = next_site
                if deterministic :
                    next_td = site_taus[searcher.site]
                else :
                    next_td = np.random.exponential(scale=site_taus[searcher.site])
                searcher.td = step + next_td
        # update samples: now the samples are saved only up to the n-1-th element of the
        # omega_t vector, because we can know how many searchers are in the void state
        # by performing a simple difference
        if step>teq and (step-teq)%tsample==0 :
            samples[i_sample,:] = omega_t[:n]
            i_sample += 1
    return omega_t, J, samples

In [None]:
class JumpingModel :
    def __init__ (self,T,site_taus) :
        self.T = T
        self.site_taus = site_taus
        self.omega_t = {}
        self.J = {}
        self.samples = {}
        self.theta = {}
    def run(self,nsteps,mu,omega_t_initial,
           seed=None,teq=0,tsample=1,deterministic=False) :
        self.omega_t[mu],self.J[mu],self.samples[mu] = \
                run_chair_simulation(nsteps,omega_t_initial,self.T,self.site_taus,
                                    seed=seed,teq=teq,tsample=tsample,deterministic=deterministic)
        self.theta[mu] = self.samples[mu].sum(axis=0)/float(self.samples[mu].sum())

Now the simulation code is ready. Below I test the results of the simulations in a simple network topology: the case in which the void state is the necessary transition hub between any two states. That is: if a searcher is on a network state, the only jump it can make is to the void state, and from the void state it can make a transition to any other state.

In [None]:
# general simulation parameters
nsteps = 10000
teq = 0
tsample = 1
deterministic=True
mus = np.arange(2,50,10)
phi = 0.50

# prepare site_taus
n = 100
nH = 10
np.random.seed(6589)
# Hsites and Lsites
Hsites = np.random.choice(n,size=nH,replace=False)
Lsites = range(n)
for site in Hsites :
    Lsites.remove(site)
site_taus = 2.0*np.ones(n+1)
site_taus[Hsites] = 20.0

# UNIFORM transition matrix
Tuni = np.zeros((n+1,n+1))
for i in xrange(n) :
    Tuni[i,i] = 0.0
    Tuni[i,n] = 1.0
    Tuni[n,i] = 1.0
Tuni = mbt.row_normalize_matrix(Tuni)
uni = JumpingModel(Tuni,site_taus)

In [None]:
# simulations
for mu in mus :
    omega_t_initial = init_omega_t(n,mu,phi)
    mbt.log_message('Uni','mu = %d'%(mu))
    uni.run(nsteps,mu,omega_t_initial,
           seed=None,teq=0,tsample=1,deterministic=True)

In [None]:
# aftermath
pr_peaks.H_to_L(uni,Hsites,Lsites)

In [None]:
plt.plot(uni.samples[12].sum(axis=1))
plt.xlabel("Time step")
plt.ylabel("Number of searcher in void")
plt.show()

In [None]:
plt.plot(mus,uni.H_to_L,'o--')
plt.xlabel(r'$\mu$')
plt.ylabel("H to L ratio")
plt.show()

The result of this study is that at least in the case of the simple topology chosen in this case, there is no qualitative difference between the previous case and this one. It is effectively like having a system in which there is a variable number of searchers in the network.