<a href="https://colab.research.google.com/github/pachterlab/GVFP_2021/blob/dev/gg220810_bf_calculation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Bayes factor analysis

This short notebook imports the `.pickle` files containing PyMC3 fits to $\Gamma$-OU and CIR models, computes the $\ln$ Bayes factors (as the ratio of model-specific marginal likelihoods, given a uniform prior), then stores them to disk.

In [1]:
!git clone --branch dev https://github.com/pachterlab/GVFP_2021.git

Cloning into 'GVFP_2021'...
remote: Enumerating objects: 2265, done.[K
remote: Counting objects: 100% (2265/2265), done.[K
remote: Compressing objects: 100% (1199/1199), done.[K
remote: Total 2265 (delta 1051), reused 2265 (delta 1051), pack-reused 0[K
Receiving objects: 100% (2265/2265), 896.58 MiB | 30.67 MiB/s, done.
Resolving deltas: 100% (1051/1051), done.
Checking out files: 100% (767/767), done.


In [2]:
!mv GVFP_2021/* .

In [3]:
!rm -r GVFP_2021

# Helper functions

In [4]:
import pymc3 as pm
import theano.tensor as tt
class LogLike(tt.Op):
    
    itypes = [tt.dvector] # expects a vector of parameter values when called
    otypes = [tt.dscalar] # outputs a single scalar value (the log likelihood)
    
    def __init__(self, mx, data, likelihood):
        
        # add inputs as class attributes
        self.mx = mx
        self.data = data
        self.likelihood = likelihood
        
    def perform(self, node, inputs, outputs):
        
        phi, = inputs # this contains parmeters
        logl = self.likelihood(phi, self.mx, self.data) # calls the log-likelihood function
        outputs[0][0] = np.array(logl) # output the log-likelihood
def ll_CIR(phi, mx, data):
    pass
def ll_GOU(phi, mx, data):
    pass

# BF import and calculation

In [7]:
import numpy as np
sample_names = ['C01','B08','H12','F08']
dataset_names = ['allen_'+y+'_glu'  for y in sample_names] 
n_datasets = len(dataset_names)

In [8]:
import pickle
# BF_genes = ['Gabra4', 'Aplp2', 'Srpk1']
BF_genes= ['Pou6f1','Gabra4','Aftph','Mark1',\
           'Tmem65','Aplp2','Psma2', 'Ppp1r2',\
           'Bace1','Srpk1', 'Nucks1', 'Ktn1'] 
n_BF_genes = len(BF_genes)
BF = np.empty((n_datasets,n_BF_genes))
BF[:] = np.nan
trace_dir= './loom/smc_results/four/'
for j,sample in enumerate(dataset_names):
    for i,gene in enumerate(BF_genes):
#         if gene in finished_genes:
        with open(trace_dir+sample+"_"+gene+'_GOU_trace.pickle', 'rb') as f:
            trace_GOU = pickle.load(f)
        with open(trace_dir+sample+"_"+gene+'_CIR_trace.pickle', 'rb') as f:
            trace_CIR = pickle.load(f)
        BF[j,i]= (trace_CIR.report.log_marginal_likelihood - trace_GOU.report.log_marginal_likelihood)
np.save('bfs.npy',BF)

In [9]:
BF.round(2)

array([[ -14.98,  -53.16,  -28.03,  -12.13,   25.  ,   74.81,   -1.51,
          -0.8 ,    2.77,   10.77,    0.88,    0.31],
       [ -43.08, -120.86,  -32.08,  -30.94,   70.02,   44.11,   59.4 ,
          18.92,    3.01,    3.81,   -5.24,    1.4 ],
       [  -4.62,  -15.23,   -2.83,   -2.27,   23.85,   70.42,   17.84,
           8.01,    0.74,   16.71,   12.52,    5.36],
       [ -28.79, -105.58,  -21.11,   -8.42,   47.67,   75.22,   19.4 ,
          11.49,   -2.15,    3.67,  -10.48,    7.79]])