In [1]:
# Append root directory to system's path
import sys
sys.path.append('../ARCH_package')

import distributions

In [2]:
import os
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio


fig = make_subplots(rows=1, cols=2)

colors = px.colors.DEFAULT_PLOTLY_COLORS
pio.templates.default = "simple_white"


In [3]:
# Load path to Other plots
path = '../Results/Distributions/'
if not os.path.exists(path):
    os.makedirs(path)

# Birth and death process distribution

The exact probability distribution of a birth and death process with fitness $s>0$ and initial clone size $x_s(t_0) = a$ at time zero is given by:
$$
\mathbb{P}\left\{ x_s(t) = m \vert x_s(t_0)=a \right\}=\begin{cases}
\sum_{j=0}^{\min\left(a,m\right)}{a \choose j}{a+m-j-1 \choose a-1}\alpha^{a-j}\beta^{m-j}\left(1-\alpha-\beta\right)^{j}, & m>0,\\
\alpha^{a}, & m=0.
\end{cases}
$$

where 
$$
\alpha=\frac{\lambda\left(e^{s\Delta t}-1\right)}{\left(\lambda + s\right)e^{ s\Delta t}-\lambda},\quad\beta=\frac{\left(  \lambda + s\right)\left(e^{s\Delta t}-1\right)}{\left(\lambda + s\right)e^{ s\Delta t}-\lambda}
$$
and $\Delta t = t-t_0$.

The mean and variance of this distribution is
$$
\mu_{x_s}\left(t\right)=ae^{s\Delta t} \quad \text{and}\quad\sigma_{x_s}^{2}\left(t\right)=\frac{a\left(2\lambda + s\right)}{s}e^{s\Delta t}\left(e^{s \Delta t}-1\right).
$$

In the limit $s\rightarrow 0$, this formula is simplified using L'Hopital's rule and yields:
$$
\mathbb{P}\left\{ x_0(t)=m \vert x_0(t_0)=a \right\} =\begin{cases}
\left( \frac{1}{1+\lambda \Delta t} \right)^{2}
\left(\frac{\lambda \Delta t}{1+\lambda \Delta t}\right)^{m-1}, & m>0\\
\frac{\lambda \Delta t}{1+\lambda \Delta t}, & m=0,
\end{cases}$$

with 

$$
\mu_{x_0}\left(t\right)=a\quad \text{and}\quad\sigma_{x_0}^{2}\left(t\right)=2\lambda a\Delta t.
$$


In [4]:
# Evaluate the probability of a single observation given a set of parameters
init_size = 10
delta_t = 3
fitness = 0

# Single size value where probability is computed
size = 10

distributions.exact_clone_size_prob(size=size, delta_t=delta_t, init_size=init_size, s=fitness)

0.04170105198676759

In [14]:
# Evaluate the pmf at an array of parameters
init_size = 20
delta_t = 1
fitness = 0

# Array of size values where probability is computed
sizes = np.arange(start=max(0, int(init_size/2)), stop=init_size*2)

prob_sizes = distributions.exact_clone_size_pmf(sizes=sizes, delta_t=delta_t, init_size=init_size, s=fitness)
fig = px.line(x=sizes, y=prob_sizes)
fig.update_layout(title= f'Probability distribution of CBD starting at {init_size} cells',
                  xaxis_title='Clone size',
                  yaxis_title='Probability')

## Computational limit of the exact solution

Overflow int precision make the exact computation of the probability distribution faulty for large sizes.

In [16]:
# Evaluate the pmf at an array of parameters
init_sizes = [10, 20, 40, 60, 80, 100]
delta_t = 1
fitness = 0

fig = go.Figure()
for i in init_sizes:
    # Array of size values where probability is computed
    sizes = np.arange(start=0, stop=150)
    prob_sizes = distributions.exact_clone_size_pmf(sizes=sizes, delta_t=delta_t, init_size=i, s=fitness)
    
    fig.add_trace(
        go.Scatter(x=sizes, y=prob_sizes, name=str(i)))
fig.update_layout(title = 'Exact CBD probability distribution',
                  yaxis_range=[0,0.1],
                  legend=dict(title='Initial clone size'))
fig.update_layout(xaxis_title='Clone size',
                  yaxis_title='Probability')

fig.show()
fig.write_image(path + 'Exact distribution.png', scale=10)
fig.write_image(path + 'Exact distribution.svg')

## Aproximating the distribution− by a negative binomial

We approximate the distribution of a birth and death process through a negative binomial parametrize by the mean and variance of a birth and death model.

$$\mathbb{P}\left\{ x_s(t)=m \vert x(t_0)= a \right\} = 
{m + \frac{\mu^2}{\sigma^2 - \mu} -1 \choose m }
\left(\frac{\sigma^2 - \mu} {\sigma^2}\right)^m 
\left(\frac{\mu} {\sigma^2}\right)^ {\frac{\mu^2}{\sigma^2 - \mu}},
$$

where $\mu$ and $\sigma^2$ are the mean and variance of the underlying birth and death process.

In [8]:
# Evaluate the pmf at an array of parameters
init_sizes = [2, 10, 20, 40, 60]
delta_t = 1
fitness = 0

fig = go.Figure()
for j, i in enumerate(init_sizes):
    # Array of size values where probability is computed
    sizes = np.arange(start=0, stop=100)
    exact_pmf = distributions.exact_clone_size_pmf(sizes=sizes, delta_t=delta_t, init_size=i, s=fitness)
    nb_pmf = distributions.nb_approx_pmf(size=sizes, init_size=i, s=fitness, delta_t=delta_t)
    
    fig.add_trace(
        go.Scatter(x=sizes, y=exact_pmf, name=f'exact {i}', line = dict(color=colors[j%10])))
    fig.add_trace(
        go.Scatter(x=sizes, y=nb_pmf, mode='markers', name=f'approx {i}', line = dict(color=colors[j%10])))
    
fig.update_layout(title = 'Exact vs NB approximation of a CBD probability distribution',
                  legend=dict(title='Initial clone size'))
fig.update_layout(xaxis_title='Clone size',
                  yaxis_title='Probability')
#fig.update_yaxes(type='log')
fig.show()

fig.write_image(path + 'NegBinom comparison.png', scale=10)
fig.write_image(path + 'NegBinom comparison.svg')

The approximated NB distribution can now be evaluated at large clone sizes

In [18]:
# Evaluate the pmf at an array of parameters
init_sizes = np.arange(10, 1_000, 50)
delta_t = 3
fitness = 0.1

fig = go.Figure()
for i in init_sizes:
    # Array of size values where probability is computed
    sizes = np.arange(start=0, stop=1000)
    prob_sizes = distributions.nb_approx_pmf(size=sizes, delta_t=delta_t, init_size=i, s=fitness)
    
    fig.add_trace(
        go.Scatter(x=sizes, y=prob_sizes, name=str(i)))

fig.update_layout(title = 'Approximated CBD probability distribution',
                  yaxis_range=[0,0.1],
                  legend=dict(title='Initial clone size'))
fig.update_layout(xaxis_title='Clone size',
                  yaxis_title='Probability')

fig.show()
fig.write_image(path + 'NegBinom large clones.png', scale=10)
fig.write_image(path + 'NegBinom large clones.svg')

## Computational time

In [19]:
# Evaluate the probability of a single observation given a set of parameters
init_size = 20
delta_t = 1
fitness = 0

# Single size value where probability is computed
sizes = np.arange(10,100,10)

In [20]:
%timeit distributions.exact_clone_size_pmf(sizes=sizes, delta_t=delta_t, init_size=init_size, s=fitness)

4.58 ms ± 22.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
%timeit distributions.nb_approx_pmf(size=sizes, delta_t=delta_t, init_size=init_size, s=fitness)

240 µs ± 2.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# Evaluating the probability distribution of VAF

Since a birth and death model is a discrete branching process, the probability distribution of clone sizes after a time interval is a discrete probability mass function.

It then follows that for any one to one map $g$, 
$$
\mathbb{P}\left\{ g(x_s(t))=m \vert g(x(t_0))= a \right\} 
= \mathbb{P}\left\{ x_s(t)=g^{-1}(m) \vert x(t_0)= g^{-1}(a) \right\}.$$

That is if 
$$
v(x) = \frac{x}{2(N+x)} 
\quad \text{and}\quad
v^{-1}(x) = \frac{2Nx}{1-2x} 
$$

it follows that
$$
\mathbb{P}\left\{ v(t)=m \vert v(t_0)= a \right\} = \mathbb{P}\left\{ x(t)=v^{-1}(m) \vert x(t_0) = v^{-1}(a) \right\}.
$$

Given $N$ we can then use $v^{-1}$ and the negative binomial distribution to approximate the probability distribution of VAF in a birth and death process.  

In [12]:
# Evaluate the pmf at an array of parameters
init_size = 100 
delta_t = 1
fitness = 0
N_w = 100_000

size = np.arange(int(init_size/2), int(init_size*2))
init_vaf = distributions.clone_size_to_vaf(x=init_size, N_w=N_w)
vaf = distributions.clone_size_to_vaf(x=size, N_w=N_w)

vaf_prob = distributions.vaf_nb_approx(vaf, init_vaf, N_w, fitness, delta_t)
size_prob = distributions.nb_approx(size, init_size, fitness, delta_t)

fig = make_subplots(rows=1, cols=2)
fig.add_trace(
    go.Scatter(x=size, y=size_prob, name='Size'), row=1, col=1)
fig.add_trace(
    go.Scatter(x=vaf, y=vaf_prob, name='VAF'), row=1, col=2)
fig.update_xaxes(title='Clone size', row=1, col=1)
fig.update_xaxes(title='Clone VAF', row=1, col=2)
fig.update_yaxes(title='Probability', row=1, col=1)


fig.update_layout(title='Probability mass function of clone size vs VAF',
                  legend=dict(title='Probability mass functions'))

## Evaluating the probability distribution of VAF gradients

We can then similarly compute the probability of observing a gradient using the inverse function of 

$$\Delta _v (t, t_0) = \frac{v(t)- v(t_0)}{t-t_0}$$

In [13]:
# Evaluate the pmf at an array of parameters
init_size = 100 
delta_t = 1
fitness = 0
N_w = 10_000

init_vaf = distributions.clone_size_to_vaf(x=init_size, N_w=N_w)
sizes = np.arange(int(init_size/2), init_size*2)
vaf_sizes = distributions.clone_size_to_vaf(x=sizes, N_w=N_w)
vaf_gradients = (vaf_sizes - init_vaf) / delta_t


vaf_grad_prob = distributions.vaf_gradient_nb_approx(gradient=vaf_gradients, init_vaf=init_vaf, N_w=N_w,
                                            s=fitness, delta_t=delta_t)

fig = px.line(x=vaf_gradients, y=vaf_grad_prob)
fig.update_layout(title= f'Probability distribution of VAF gradients CBD starting at VAF {round(init_vaf,3)}',
                  xaxis_title='VAF',
                  yaxis_title='Probability')

## Evaluating the probability distribution of VAF gradients in the presence of a fit clone

Consider 
$$\Delta _v (t, t_0) = \frac{v(t)- v(t_0)}{t-t_0},$$

with
$$v_t(n) = \frac{n(t)}{2(N_w+e^{s_f(t-t_f)} + n(t))}.
$$

Similar to the change of variables of VAF, we now have that 

$$v^{-1}_t(x) = \frac{2\left(N_w+e^{s_f(t-t_f)}\right)x(t)}{1-2x(t)}$$