# scRNA cell population change test

The purpose of this method is to test whether cell populations sizes significantly different trhough $M$ covariats $X^{N\times M}$. We assume the following data generating process of cell population counts $y_{ik}$ of $i\in\{1,..,N\}$ samples and $k\in\{1,..,K\}$ detected cell populations:

\begin{align}
y_i &\sim\text{DirMult}(y_i^+,\gamma_i)\\
y_i^+ &= \sum_{k=1}^K y_{i,k}\\
\log(\gamma_{ij}) &\sim \alpha_j + x_i^T\beta_j + V_{ij} \\
\alpha &\sim \text{MvN}(\mu_0, \sigma_{0}^2 I)\\
\beta_{j,k} &\sim \text{N}(0, \nu_{\beta,j}^2\sigma_{\beta,k}^2)\\
V_i &\sim \text{MvN}(\mathbf{0}, \Sigma)\\
\sigma_0^2 &\sim \text{HalfCauchy}(0, 5)\\
\sigma_{\beta,k}^2 &\sim \text{HalfCauchy}(0, 5)\\
\nu_{\beta,j}^2 &\sim \text{HalfCauchy}(0, 1)\\
\Sigma &\sim \text{Inv-Wishart}(\Psi_0, \rho_0)
\end{align}

$V_i$ characterize the unobserved characteristics that are associated with the mean count for cell type $k$ in subject $i$ and account for within-subject correlations.

In [None]:
%matplotlib inline
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import csv
import IPython
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import tensorflow as tf
import tensorflow_probability as tfp
import warnings

from tensorflow_probability import edward2 as ed

plt.style.use('ggplot')

class MVNCholPrecisionTriL(tfd.TransformedDistribution):
  """MVN from loc and (Cholesky) precision matrix."""

  def __init__(self, loc, chol_precision_tril, name=None):
    super(MVNCholPrecisionTriL, self).__init__(
        distribution=tfd.Independent(tfd.Normal(tf.zeros_like(loc),
                                                scale=tf.ones_like(loc)),
                                     reinterpreted_batch_ndims=1),
        bijector=tfb.Chain([
            tfb.Affine(shift=loc),
            tfb.Invert(tfb.Affine(scale_tril=chol_precision_tril,
                                  adjoint=True)),
        ]),
        name=name)

def make_model(X,y,n,m,k):
    """
    :param X: The feature matrix
    :param y: cell counts
    :param n: number of data
    :param m: number of covariats
    :param k: number of cell populations
    :returns: The tensorflow model
    """
    
    #hyperprios
    nu = ed.HalfCauchy(0,1, sample_shape=m, name="nu_beta")
    sigma_beta = ed.HalfCauchy(0,5, sample_shape=k, name="sigma_beta")
    sigma_alpha = ed.HalfCauchy(0, 5, sample_shape=k, name="sigma_alpha")
    
    # hyperprior for covariance matrix of random effect
    Sigma_rnd = tfd.Wishart(df=k+1, 
                               scale_tril=np.stack([np.eye(k)]*k),
                               input_output_cholesky=True,
                               name='Sigma')
    #random effect
    V = MVNCholPrecisionTriL(loc=tf.zeros(k),
                            chol_precision_tril=Sigma_rnd,
                            sample_shape=[n,k],
                            name="V")
    
    
    #priors
    alpha = ed.MultivariateNormalDiag(tf.zeros(k),
                                      scale_identity_multiplier=sigma_alpha, 
                                      name="alpha")
    beta = ed.MultivariatNormalDiag(tf.zeros(m,k),
                                   scale_identity_multiplier=tf.tensordot(nu,sigma_beta, axis=0),
                                   name="beta")
    
    y_pred = ed.DirichletMultinomial(n_total, 
                                     tf.exp(alpha + tf.tensordot(X,beta) + V), 
                                     name="cell_pop")
    return y_pred
    
    
    
    
    
    
    