In [6]:
import pandas
import pystan

NeuronTopGeneGuides = pandas.read_csv("~/sgRNA/tiling/Neuron/NeuronTopGeneGuides.txt", sep='\t', header=0)
NeuronTopGeneGuides.head()

NeuronTopGeneGuides['gene'] = NeuronTopGeneGuides['gene'].astype('category')

mixture_model = """
data {
  int<lower=1> n_sgRNAs;
  int<lower=1> n_genes;
  real mu1;
  real<lower=0, upper=1> q0;
  real x[n_sgRNAs];
  int<lower=0, upper=n_genes> gene_ids[n_sgRNAs];
}
parameters {
  real mu_g[n_genes];
  real<lower=0, upper=1> q[n_genes];
  real<lower=0> sigma_g;
  real<lower=0> sigma1;
  real mu0;
  real<lower=0> sigma0;
}
model{
  mu_g ~ normal(mu1, sigma_g);
  mu0 ~ normal(0, 0.1);
  sigma0 ~ normal(0, 1);
  sigma_g ~ cauchy(0, 1);
  q ~ beta(q0*4/(1 - q0), 4);
  sigma1 ~ cauchy(0, 1);
  for (i in 1:n_sgRNAs){
    target += log_mix(q[gene_ids[i]], 
                      normal_lpdf(x[i] | mu_g[gene_ids[i]], sigma1), 
                      normal_lpdf(x[i] | mu0, sigma0)); 
  }
}
"""

sgRNAdata = {'n_sgRNAs' : NeuronTopGeneGuides.shape[0],
             'n_genes' : NeuronTopGeneGuides['gene'].nunique(),
             'mu1' : 1.7,
             'q0' : 0.2,
             'x' : NeuronTopGeneGuides['log2fc'],
             'gene_ids' : NeuronTopGeneGuides['gene'].cat.codes + 1 # stan starts counting at 1
            }

neuron_stan_fit = pystan.stan(model_code = mixture_model,
                             data=sgRNAdata, iter=2000, chains=4)
print(neuron_stan_fit)


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_83d9cc82176032cc96b5f1f9bcbe2f0d NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


Inference for Stan model: anon_model_83d9cc82176032cc96b5f1f9bcbe2f0d.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
mu_g[1]    1.71  4.8e-3   0.33   1.03   1.57    1.7   1.84   2.46 4697.0    1.0
mu_g[2]    1.59  8.3e-3   0.32    0.8   1.45   1.65   1.75   2.16 1502.0    1.0
mu_g[3]    1.72  4.8e-3   0.33    1.0   1.58   1.71   1.85   2.49 4797.0    1.0
mu_g[4]    1.72  4.8e-3   0.33   1.05   1.59   1.71   1.83   2.58 4720.0    1.0
mu_g[5]     1.7  4.4e-3    0.3   1.04   1.57    1.7   1.82   2.37 4878.0    1.0
mu_g[6]    1.76  7.0e-3   0.35    1.1   1.61   1.72   1.89   2.63 2556.0    1.0
mu_g[7]    1.67  5.1e-3   0.34   0.92   1.53   1.68   1.81    2.4 4620.0    1.0
mu_g[8]     1.7  5.0e-3   0.33   0.99   1.57    1.7   1.83   2.42 4414.0    1.0
mu_g[9]    1.75  4.8e-3   0.31   1.12   1.61   1.71   1.86   2.48 4255.0    1.0
mu_g[10]   

In [7]:

SelfRenewalTopGeneGuides = pandas.read_csv("~/sgRNA/tiling/SelfRenewal/SelfRenewalTopGeneGuides.txt", sep='\t', header=0)
SelfRenewalTopGeneGuides.head()

SelfRenewalTopGeneGuides['gene'] = SelfRenewalTopGeneGuides['gene'].astype('category')


sgRNAdata = {'n_sgRNAs' : SelfRenewalTopGeneGuides.shape[0],
             'n_genes' : SelfRenewalTopGeneGuides['gene'].nunique(),
             'mu1' : 4.3,
             'q0' : 0.5,
             'x' : SelfRenewalTopGeneGuides['log2fc'],
             'gene_ids' : SelfRenewalTopGeneGuides['gene'].cat.codes + 1 # stan starts counting at 1
            }

selfrenewal_stan_fit = pystan.stan(model_code = mixture_model,
                             data=sgRNAdata, iter=2000, chains=4)
print(selfrenewal_stan_fit)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_83d9cc82176032cc96b5f1f9bcbe2f0d NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


Inference for Stan model: anon_model_83d9cc82176032cc96b5f1f9bcbe2f0d.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
mu_g[1]    3.42  8.6e-3    0.6    2.3    3.0   3.41   3.82   4.67 4958.0    1.0
mu_g[2]    3.27    0.01   0.79   1.75   2.72   3.25   3.79    4.9 4179.0    1.0
mu_g[3]     2.5  6.6e-3   0.42   1.71   2.22    2.5   2.78   3.37 3942.0    1.0
mu_g[4]    2.96  4.8e-3   0.31   2.37   2.76   2.96   3.16   3.58 4077.0    1.0
mu_g[5]    4.22  9.5e-3   0.61   3.04   3.81   4.21   4.63   5.42 4167.0    1.0
mu_g[6]     2.9  6.8e-3   0.42   2.12   2.61   2.89   3.17   3.76 3758.0    1.0
mu_g[7]    3.51  4.0e-3   0.31   2.93    3.3   3.51   3.72   4.11 5811.0    1.0
mu_g[8]    3.17  5.6e-3   0.39    2.4    2.9   3.16   3.42   3.96 4994.0    1.0
mu_g[9]    4.51    0.01   0.84   2.89   3.92   4.49   5.07   6.15 4550.0    1.0
mu_g[10]   