In [None]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import mybiotools as mbt

# 2018-02-15 Chair model
I want to look at my original "chair" model in more detail. I left that a little bit hanging because I realized that the occupancy per searcher is not an observable in the biological experiments that interest me. Here, I want to try another approach. I want to perform simulations on a model which resebmles more a biological experiment. The idea is that in a given ChIP-seq experiment one does not know the number of cells, the number of proteins present in the system, and the cell-to-cell variablility.

## Model setup

### Basics

The model will be built like this:

- I consider $N$ equivalent systems made of $n$ sites each, which are explored by searchers. Each site has a characteristic time $\tau_j$, where $j$ is the index that runs over the sites. Whenever a searcher hits site $j$, if the site is empty it will stay on that site for a time randomly extracted from the exponential distribution with characteristic time $\tau_j$.
- I consider that the number of searchers is not the same for each system, so that there is no way of normalizing the occupancy per searcher. The number of searchers for the $i$-th system is $m_j$.The number of searchers per system will be extracted from a Gaussian distribution with mean $\mu$ and variance $\sigma^2$.

The occupancy at each time step is determined by the matrix $\Omega(t)$. Such matrix has the system ids as row indices and the site ids as the column indices. The values of the matrix $\Omega_{ij}(t)$ can be either zero or one, depending on whether the $j$-th site of the $i$-th system is empty or occupied at time $t$. The sum rule for the rows of the matrix therefore reads

$\sum_{j=1}^n \Omega_{ij}(t) = m_j$

The occupancy at a given time step is the observable that we are interested in. In fact, in a real biological case one can imagine that a snapshot of the system is performed at a given time, and one measures the total number of particles sitting on a given site of the system. Therefore, I will never perform time averages. Instead, I will focus my attention on the occupancy vector

$\bar{\Omega}_j (t) = \sum_{i=1}^N \Omega_{ij} (t)$

### Transition probabilities

The Markov process that governs the search is expressed mathematically by setting the transition matrix $T$. The element $T_{kl}$ is defined as the probability that at any given instant a particle on site $k$ performes a transition to site $l$. The matrix is defined as normalized, so that $\sum_l T_{kl} = 1$.


## Code setup
Whenever a searcher needs to move (recall: not at every time step) then it searches for available sites on the system. Therefore it will get an array of indices ${\alpha}$ corresponding to the indices of the free sites in the system. It then obtains a reduced transition matrix $T_{l\alpha}$ which will be normalized to its own sum. This is the transition matrix for that particular step in the simulation.

## Code setup

### Main simulation functions and data structures

The search process will be modelled using a special class defined ad hoc. The "Searcher" class contains a searcher identity, the site on which the searcher is, and a parameter defining how long the searcher will spend on that given site.

In [None]:
class Searcher :
    def __init__(self,index,site,td) :
        self.index = index
        self.site = site
        self.td = td

I init a list of lists of searchers, which corresponds to the searchers that perform the random walks on each of the systems defined in the model.

In [None]:
def init_searchers(omega_t,site_taus) :
    """
    Initializes the searchers in the system. Note that this function will only
    be invoked from within the main simulation loop, so the "searchers" list that
    will be created is only visible internally, as a convenient data structure to
    keep track of what's going on, and not to the external world to see.
    """
    # get parameters from the omega_t matrix
    N,n = omega_t.shape
    m = omega_t.sum(axis=1)
    # init the searchers
    searchers = []
    for i in xrange(N) :
        searchers_system = []
        # "searcher_sites" is a vector of indices containing the indices
        # of the sites occupied by the searchers
        searcher_sites = np.where(omega_t[i])[0]
        for s in xrange(m[i]) :
            site = searcher_sites[s]
            td = np.random.exponential(scale=site_taus[site])
            searcher = Searcher(s,site,td)
            searchers_system.append(searcher)
            # print "System %d: Searcher %d on site %d with tau = %.3f"%(i,s,site,td)
        searchers.append(searchers_system)
    return searchers

Below is the main simulation function of the system. It takes the `omega_t` matrix, and runs the simulation for `nsteps`.

In [None]:
def run_chair_simulation(nsteps,omega_t_initial,T,site_taus,seed=None) :
    # init the random number generator if it was passed
    if seed is not None :
        np.random.seed(seed)
    # make an internal copy of the initial omega_t matrix
    omega_t = omega_t_initial.copy()
    # no need to pass N,n,m through the arguments of the function
    N,n = omega_t.shape
    m = omega_t.sum(axis=1)
    # init searchers
    searchers = init_searchers(omega_t,site_taus)
    # cycle on time
    for step in xrange(1,nsteps+1) :
        # cycle on the systems
        for i in xrange(N) :
            # cycle on the searchers
            for s in xrange(m[i]) :
                searcher = searchers[i][s]
                # if the searcher has to stay longer on the site, skip it
                if step>searcher.td :
                    # if not, get the elements corresponding to the transition matrix
                    free_sites = np.where(~omega_t[i,:])[0]
                    Tstar = T[searcher.site,free_sites]
                    Tstar /= Tstar.sum()
                    # now get the next site
                    next_site = np.random.choice(free_sites,p=Tstar)
                    # print info if needed
                    # print "Step %d (tau = %.1f): %d --> %d"%(step,searcher.td,
                                                               #searcher.site,next_site)
                    # update omega matrix
                    omega_t[i,searcher.site] = False
                    omega_t[i,next_site] = True
                    # update searcher
                    searcher.site = next_site
                    searcher.td = step + np.random.exponential(scale=site_taus[searcher.site])
    return omega_t

In [None]:
def init_omega_t(N,n,mu,sigma,seed=None) :
    # init the random number generator if it was passed
    if seed is not None :
        np.random.seed(init_seed)
    # init searcher numbers
    m = np.random.normal(loc=mu,scale=sigma,size=N).astype(np.int32)
    # ensures that no system has more searchers than available sites and that
    # every system has at least one searcher
    m[m>n] = n
    m[m<=1] = 1
    # init omega matrix
    omega_t = np.zeros((N,n),dtype=bool)
    # fill with initial occupancy
    for i in xrange(N) :
        omega_t[i,np.random.choice(n,m[i],replace=False)] = True
    return omega_t

### Initialization of the system

We first initialize the constants of the system, and the $\Omega (t)$ matrix.

In [None]:
# init constants of the system
N = 100                                  # number of equivalent systems
n = 340                                   # number of sites in each system

Concerning the site-specific affinity, I'll start by considering a special case, which is that all sites have a low affinity, some have medium affinity, and very few have high affinity.

In [None]:
Msites = [5,100,150,200,250,300]
Hsites = [50,175]
site_taus = np.ones(n)
site_taus[Msites] = 5.0
site_taus[Hsites] = 20.0
# plot it
fig = plt.figure(figsize=(10,3))
plt.bar(np.arange(n),site_taus)
plt.xlabel("Site index")
plt.ylabel(r"$\tau$")
plt.show()

## Uniform transition probability
Let's start with the simple case in which the transition matrix is uniform. That is to say, the probability of jumping from any site to any other site is constant.

In [None]:
# init the transition probability
Tflat = np.ones((n,n))/n

In [None]:
%%time
nsteps = 1000
sigma = 2
seed = 9889765
mus = [1,10,20]
sims = {}
for mu in mus :
    print "Mu = %d"%(mu)
    omega_t_initial = init_omega_t(N,n,mu,sigma)
    sims[mu] = run_chair_simulation(nsteps,omega_t_initial,Tflat,site_taus)

In [None]:
fig,axarr = plt.subplots(len(mus),1,figsize=(10,5))
x = np.arange(n)
show_xaxis=False
for i,mu in enumerate(mus) :
    ax = axarr[i]
    if i==2 :
        show_xaxis=True
    mbt.line_plot(ax,x,sims[mu].sum(axis=0),show_xaxis=show_xaxis,color='b')
    ax.text(0.8,0.8,r'$\mu = %.1f$'%(mu),transform=ax.transAxes,fontsize=18)
            
plt.show()

In [None]:
fig = plt.figure(figsize=(10,10))
plt.matshow(sims[1])

In [None]:
Lsites = range(n)
for site in Hsites :
    Lsites.remove(site)
for site in Msites :
    Lsites.remove(site)

In [None]:
M_to_L = np.zeros(len(mus))
for i,mu in enumerate(mus) :
    traffic = sims[mu].sum(axis=0)
    H_to_L[i] = traffic[Hsites].mean()/traffic[Lsites].mean()
    M_to_L[i] = traffic[Msites].mean()/traffic[Lsites].mean()

In [None]:
plt.plot(mus,H_to_L,color='b')
plt.plot(mus,M_to_L,color='r')