In [1]:
import numpy as np
import scipy.special
import scipy.stats as st

import bokeh.io
import bokeh.plotting

bokeh.io.output_notebook()

notebook_url = "localhost:8888"

To compute the convolution of Negative Binomial and Poisson, we brute-force sum them, using the log-sum-exp trick. This is a slow calculation because we are using scipy.stats. This will result in slower response of the interaction, which is why I'm using throttling below. We could speed this up by not using SciPy and hand-computing the log PMF and applying the log-sub-exp trick.

Note that we [can't throttle range changes](https://github.com/bokeh/bokeh/issues/10654), so that will *really* lag if we change the x-axis. So, maybe choose a bigger x-range using `x_min` and `x_max` below.

In [2]:
def pois_nbinom_pmf(n, lam, alpha, beta):
    """Return f(n; lambda, alpha, beta) for the convolution of a Poisson and Negative Binomial."""
    if np.isscalar(n):
        return _pois_nbinom_pmf_single_n(n, lam, alpha, beta)

    return np.array([_pois_nbinom_pmf_single_n(n_, lam, alpha, beta) for n_ in n])


def _pois_nbinom_pmf_single_n(n, lam, alpha, beta):
    """Return f(n; lambda, alpha, beta) for the convolution of a Poisson and Negative Binomial.
    This is for a single value of n.
    """
    k = np.arange(0, n + 1)
    terms = st.nbinom.logpmf(n - k, alpha, beta / (1 + beta)) + st.poisson.logpmf(
        k, lam
    )

    return np.exp(scipy.special.logsumexp(terms))


def pois_gene_nbinom_transript_pmf(n, lam, alpha, beta, n_std=5):
    """Model where the are g gene copies with g ~ Pois(lam)."""
    # Determine for which positive g values to compute PMF (mean ± 5 stds of Poisson)
    g_mean_int = int(lam)
    g_std = int(np.ceil(np.sqrt(lam)))
    g_min = max(1, int(g_mean_int - n_std * g_std))
    g_max = int(g_mean_int + n_std * g_std) + 1
    g_vals = np.arange(g_min, g_max + 1)

    # Compute P(n, g | lam, alpha, beta) for g ≥ 1
    terms = np.empty((len(g_vals), len(n)))
    for i, g in enumerate(g_vals):
        terms[i] = st.poisson.logpmf(g, lam)
        terms[i] += st.nbinom.logpmf(n, g * alpha, beta / (1 + beta))

    # Marginalize to get P(n | lam, alpha, beta)
    probs = np.exp(scipy.special.logsumexp(terms, axis=0))

    # Handle the special case for n = 0, also get zero if no gene copies
    if 0.0 in n:
        probs[np.where(n == 0)] += st.poisson.pmf(0, lam)

    return probs

Now, we build the plot.

In [4]:
# Parameters for this plot
x_min = 0
x_max = 1000


# Set up plot area
p_x_axis_label = "n"
p_y_axis_label = "f(n; μ, r, b)"
p_y_axis_type = "log"
title = "Poisson-NBinom"
kwargs = dict(frame_height=200, frame_width=350)

p_p = bokeh.plotting.figure(
    x_axis_label=p_x_axis_label,
    y_axis_label=p_y_axis_label,
    y_axis_type=p_y_axis_type,
    title=title,
    x_range=[x_min, x_max],
    **kwargs,
)

# Sliders for parameters
mu_slider = bokeh.models.Slider(start=0.1, end=100, value=5, step=0.1, title="μ")
r_slider = bokeh.models.Slider(start=0.1, end=100, value=5, step=0.1, title="r")
b_slider = bokeh.models.Slider(start=0.1, end=100, value=5, step=0.1, title="b")

# x-values for plot
x = np.arange(
    max(0, int(p_p.x_range.start)), max(0, int(np.floor(p_p.x_range.end))) + 1
)

# y-values for plot
y_p = pois_gene_nbinom_transript_pmf(
    x, mu_slider.value, r_slider.value, 1 / b_slider.value
)

# Data source
source_p = bokeh.models.ColumnDataSource(dict(x=x, y_p=y_p))

# Callback for updating plot
def callback(attr, old, new):
    x = np.arange(max(0, int(p_p.x_range.start)), max(0, int(p_p.x_range.end) + 1))
    y_p = pois_gene_nbinom_transript_pmf(
        x, mu_slider.value, r_slider.value, 1 / b_slider.value
    )
    source_p.data = dict(x=x, y_p=y_p)


# Link sliders and x-range to sliders (callback is laggy, so throttle)
mu_slider.on_change("value_throttled", callback)
r_slider.on_change("value_throttled", callback)
b_slider.on_change("value_throttled", callback)
p_p.x_range.on_change("start", callback)
p_p.x_range.on_change("end", callback)


# Plot PMF
p_p.circle("x", "y_p", source=source_p, size=5)
if p_y_axis_type != "log":
    p_p.segment(x0="x", x1="x", y0=0, y1="y_p", source=source_p, line_width=2)


layout = bokeh.layouts.column(mu_slider, r_slider, b_slider, p_p)


def app(doc):
    doc.add_root(layout)


bokeh.io.show(app, notebook_url=notebook_url)

In [5]:
# A fun one

n = np.arange(0, 4001)
pmf = pois_gene_nbinom_transript_pmf(n, 5, 30, 1/10, n_std=5)

p = bokeh.plotting.figure(
    frame_width=350,
    frame_height=250,
    x_axis_label=p_x_axis_label,
    y_axis_label=p_y_axis_label,
)
p.circle(n, pmf)
p.segment(x0=n, x1=n, y0=0, y1=pmf, line_width=2)

bokeh.io.show(p)