In [4]:
import pandas
import pystan

NeuronTopGeneGuides = pandas.read_csv("~/sgRNA/tiling/Neuron/NeuronTopGeneGuides.txt", sep='\t', header=0)
NeuronTopGeneGuides.head()

NeuronTopGeneGuides['gene'] = NeuronTopGeneGuides['gene'].astype('category')

mixture_model = """
data {
  int<lower=1> n_sgRNAs;
  int<lower=1> n_genes;
  real mu1;
  real<lower=0, upper=1> q0;
  real x[n_sgRNAs];
  int<lower=0, upper=n_genes> gene_ids[n_sgRNAs];
}
parameters {
  real mu_g[n_genes];
  real<lower=0, upper=1> q[n_genes];
  real<lower=0> sigma_g;
  real<lower=0> sigma1;
  real mu0;
  real<lower=0> sigma0;
}
model{
  mu_g ~ normal(mu1, sigma_g);
  mu0 ~ normal(0, 0.5);
  sigma0 ~ normal(0, 1);
  sigma_g ~ cauchy(0, 1);
  q ~ beta(q0*4/(1 - q0), 4);
  sigma1 ~ cauchy(0, 1);
  for (i in 1:n_sgRNAs){
    target += log_mix(q[gene_ids[i]], 
                      normal_lpdf(x[i] | mu_g[gene_ids[i]], sigma1), 
                      normal_lpdf(x[i] | mu0, sigma0)); 
  }
}
"""

sgRNAdata = {'n_sgRNAs' : NeuronTopGeneGuides.shape[0],
             'n_genes' : NeuronTopGeneGuides['gene'].nunique(),
             'mu1' : 1.7,
             'q0' : 0.2,
             'x' : NeuronTopGeneGuides['log2fc'],
             'gene_ids' : NeuronTopGeneGuides['gene'].cat.codes + 1 # stan starts counting at 1
            }

sgRNA_stan_fit = pystan.stan(model_code = mixture_model,
                             data=sgRNAdata, iter=2000, chains=4)
print(sgRNA_stan_fit)


INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_ad0574e28225a1b354bee64a2c867c40 NOW.
  tree = Parsing.p_module(s, pxd, full_module_name)


Inference for Stan model: anon_model_ad0574e28225a1b354bee64a2c867c40.
4 chains, each with iter=2000; warmup=1000; thin=1; 
post-warmup draws per chain=1000, total post-warmup draws=4000.

           mean se_mean     sd   2.5%    25%    50%    75%  97.5%  n_eff   Rhat
mu_g[1]    1.73  6.4e-3   0.33   1.06   1.57   1.72   1.86   2.52 2760.0    1.0
mu_g[2]    1.58    0.01   0.34   0.73   1.42   1.64   1.74   2.15  894.0   1.01
mu_g[3]    1.73  5.4e-3   0.35   0.99   1.58   1.69   1.87   2.52 4212.0    1.0
mu_g[4]    1.74  5.2e-3   0.34   1.08   1.59    1.7   1.87   2.53 4232.0    1.0
mu_g[5]    1.71  5.1e-3   0.33   1.02   1.56   1.68   1.85   2.47 4265.0    1.0
mu_g[6]    1.76  6.4e-3   0.36   1.01   1.59   1.74   1.89   2.64 3210.0    1.0
mu_g[7]    1.66  5.4e-3   0.36   0.83   1.51   1.69   1.81   2.41 4390.0    1.0
mu_g[8]    1.71  5.6e-3   0.35   0.95   1.56   1.69   1.85   2.48 3857.0    1.0
mu_g[9]    1.75  4.8e-3   0.33    1.1    1.6    1.7   1.87   2.55 4666.0    1.0
mu_g[10]   

In [2]:

SelfRenewalTopGeneGuides = pandas.read_csv("~/sgRNA/tiling/SelfRenewal/SelfRenewalTopGeneGuides.txt", sep='\t', header=0)
SelfRenewalTopGeneGuides.head()

Unnamed: 0,seq,chr,pos,start,end,strand,gene,guide,distance2TSS,log2fc
0,GCAGGGACCTGGGTGTATAGGCTAGACCCAAGGCACAGCTGTG,chr1,118624996,118624986,118625028,+,Tfcp2l1,GGGTGTATAGGCTAGACCCA,2930,4.211698
1,TTCAGGAGGTGGACACCACAGCTGTGCCTTGGGTCTAGCCTAT,chr1,118625012,118625002,118625044,-,Tfcp2l1,GGACACCACAGCTGTGCCTT,2914,2.988242
2,TTTACTTTCTGGGTTTGTTAAGAGCTTTCGGGGAAGAGGAAAG,chr1,118625779,118625769,118625811,-,Tfcp2l1,GGGTTTGTTAAGAGCTTTCG,2147,1.829315
3,ACTGAAGTTTGTCTTTGCTGGAGCCCTGAGGGGATGGGATGCC,chr1,118625882,118625872,118625914,+,Tfcp2l1,GTCTTTGCTGGAGCCCTGAG,2044,1.603842
4,AAATAAATCTGTTGGAGAGGAACTGCTTTCTGGCATCCCATCC,chr1,118625913,118625903,118625945,-,Tfcp2l1,GTTGGAGAGGAACTGCTTTC,2013,5.674412
