In [3]:
import itertools
import warnings

# Our numerical workhorses
import numpy as np
import pandas as pd
import scipy.stats as st
import scipy.special

# The MCMC Hammer
import emcee

#import numba

# BE/Bi 103 utilities
import bebi103

# Import plotting tools
import matplotlib.pyplot as plt
import seaborn as sns
import corner

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables high res graphics inline (only use with static plots (non-Bokeh))
# SVG is preferred, but there is a bug in Jupyter with vertical lines
%config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}
sns.set_context('notebook', rc=rc)
sns.set_style('darkgrid', rc=rc)

# Suppress future warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [4]:
df = pd.read_csv("./data/gardner_hw6/gardner_mt_catastrophe_only_tubulin.csv", comment = "#")

In [41]:
df.shape

(692, 5)

In [4]:
tau = np.array([15,10,17])
t=100

#m=0
for i in range(2):
    if not tau[i]<tau[i+1]:
        sum_z = -np.inf
        break
        
    else:
        x=[] #d
        y=[] #num
        z=[] #function output
        x_array_product=[] #den

        for j in range(3):
            y.append( (tau[j]*np.exp(-t/tau[j]) ))

            for k in range(3):
                if k != j:
                    x.append(tau[j] - tau[k])
            x_array=np.array(x)
            x_array_product.append(np.prod(x_array))

            z.append(y[j]/x_array_product[j])
            x=[]
            #m+=1
        z_array=np.array(z)
        if np.sum(z)>0:
            sum_z = np.log(np.sum(z))
        else:
            RaiseError("WTF man!?")
    break
            

In [7]:
def log_likelihood(tau, t, m):
    """
    Takes in tau (should be an array), 
    data, various values for m. We are very proud of this function.
    """
    tau = np.sort(tau)
    
    num_handler = [] #numerator
    den_handler = [] #denominator
    den_elements = [] #elements necessary to take products in den
    func_handler = [] #output log posterior
    
    for i in range(m-1):

        if not tau[i]<tau[i+1]:

            return -np.inf
            
        for j in range(m):
            num_handler.append(tau[j]**(m-2) * np.exp(-t/tau[j]))

            for k in range(m):
                if k != j:
                    den_elements.append(tau[j] - tau[k])

            den_handler.append(np.prod(den_elements))

            func_handler.append(num_handler[j]/den_handler[j])

            den_elements = []

    if np.sum(func_handler)>0:
        return np.log(np.sum(func_handler))

    else:
        #raise RuntimeError("WTF Man.")
        return -np.inf

def log_prior(tau, m):
    """
    Log prior for model defined above. Takes in m and tau, 
    returns log prior.
    """
    tau = np.sort(tau)
    
    for i in range(m-1):

        if not tau[i]<tau[i+1]:

            return -np.inf
    
    if not np.all(tau> 0):
        return -np.inf
    
    return -np.log(np.prod(tau))
        
#test_run = log_likelihood(tau, t, 3)

def tau_start(m, n_walkers, n_temps):
    p = np.empty((n_temps, n_walkers, m))
    
    for i in range(m):
        p[:,:,i]=np.random.exponential(1, (n_temps,n_walkers))
    return p


In [8]:
df["12 uM"].head()

0    25.000
1    40.000
2    40.000
3    45.429
4    50.000
Name: 12 uM, dtype: float64

In [32]:
n_burn = 500
n_steps = 5000
n_temps = 20
n_walkers = 50
m = 3
p0=tau_start(m, n_walkers, n_temps)
loglargs = (df["12 uM"], m)
logpargs = (m,)
columns = ["tau_1", "tau_2", "tau_3"]

In [14]:
import time
start = time.time()

df_out, lnZ_out, dlnZ_out = bebi103.run_pt_emcee(log_likelihood, log_prior, n_burn, n_steps, 
                    n_temps=n_temps, p0=p0, loglargs=loglargs, 
                    logpargs=logpargs, threads=None, columns=columns,
                    return_lnZ=True)

print('It took ', time.time()-start, ' seconds.')

It took  9055.81182718277  seconds.


In [39]:
df_out[2:100]


Unnamed: 0,tau_1,tau_2,tau_3,lnlike,lnprob,beta_ind,beta,chain
2,4.880407e+01,0.259209,7.960776,-0.266388,-4.878606,0,1,0
3,4.880407e+01,0.259209,7.960776,-0.266388,-4.878606,0,1,0
4,1.832288e+02,1256.812666,1.685551,-0.359224,-13.228386,0,1,0
5,1.832288e+02,1256.812666,1.685551,-0.359224,-13.228386,0,1,0
6,1.832288e+02,1256.812666,1.685551,-0.359224,-13.228386,0,1,0
7,1.832288e+02,1256.812666,1.685551,-0.359224,-13.228386,0,1,0
8,1.302385e+03,405.563423,0.269390,-0.726229,-12.591865,0,1,0
9,8.149814e+01,0.000058,1.242134,0.056101,5.195170,0,1,0
10,1.789196e+01,28.725441,40.295884,0.250834,-9.687549,0,1,0
11,1.789196e+01,28.725441,40.295884,0.250834,-9.687549,0,1,0


In [17]:
df_out.to_csv('./ptmcmc_results_test1.csv')

In [13]:
a, b, c=5, 3, 2

In [14]:
np.log(np.exp(a) - np.exp(b) + np.exp(c))

4.910569464803074

In [15]:
a + np.log(1 - np.exp(b-a) + np.exp(c-a))

4.910569464803074

In [24]:
t = df["12 uM"][0]

In [33]:
df_out

NameError: name 'df_out' is not defined

In [27]:
tau = np.array([0.06463911, 0.04257534, 0.11927362])
z=log_likelihood(tau,t,3)

In [28]:
z

-205.56031209255227