In [1]:
import numpy as np
import pandas as pd

import bebi103

import altair as alt
import altair_catplot as altcat

import bokeh.io
import bokeh.plotting
from bokeh.layouts import row, column
from bokeh.models import Range1d
import bokeh.application
import bokeh.application.handlers
bokeh.io.output_notebook()

Features requiring DataShader will not work and you will get exceptions.
  Features requiring DataShader will not work and you will get exceptions.""")


# Simple File Processing 

I had special names for all of my genotypes that I used to record data. For the purposes of plotting and for others understanding the data I renamed everything and made new dataframes.  


Implicit in all strains except N2 is syIs231 (which is hs:lin-3c). 

Implicit in all rescue strains except N2 and "+" is affl-2(sy975). 


**You may need to alter folders for files depending on your setup**

# Making Plots


First we load in our data.

In [2]:
# load data 

df_mut = pd.read_csv('data/mutant_lin3c.csv')
df_mut['Total'] = df_mut['Yes'].values +  df_mut['No'].values
print(df_mut['Genotype'].unique())

df_resc = pd.read_csv('data/rescue_lin3c.csv')
df_resc['Total'] = df_resc['Yes'].values +  df_resc['No'].values
print(df_resc['Genotype'].unique())


['N2' '+' 'hsf-1(sy1198)' 'hsf-1(sy441)' 'affl-2(sy975)'
 'affl-1(sy1220) affl-2(sy975)' 'affl-1(sy1202)']
['AFFL-2 Del+NLS::GFP' 'affl-2(sy975)' 'AFFL-2::GFP' 'N2'
 'AFFL-2 Del::GFP' '+' 'AFFL-2 FUSLC+NLS::GFP' 'AFFL-2 FUSLC*+NLS::GFP']


We define an order for the mutant strains and plot the results. 

In [3]:
order_mut = [('N2', 'Before'),
 ('N2', 'After'),
 ('+', 'Before'),
 ('+', 'After'),
 ('hsf-1(sy1198)', 'Before'),
 ('hsf-1(sy1198)', 'After'),
 ('hsf-1(sy441)', 'Before'),
 ('hsf-1(sy441)', 'After'),
 ('affl-2(sy975)', 'Before'),
 ('affl-2(sy975)', 'After'),
 ('affl-1(sy1202)', 'Before'),
 ('affl-1(sy1202)', 'After'),
 ('affl-1(sy1220) affl-2(sy975)', 'Before'),
 ('affl-1(sy1220) affl-2(sy975)', 'After')]

In [4]:

p = bebi103.viz.jitter(df_mut,
                       ['Genotype', 'Heat Shock'],
                       'Fraction Pumping',
                       horizontal=False,
                       plot_width=600,
                       plot_height = 300,
                       #palette=pp[::2],
                       line_color='black',
                       size = 7,
                     
                       
                       y_axis_label=None,
                       color_column = 'Heat Shock',
                       order = order_mut,
                       palette = [bokeh.palettes.Colorblind[3][1], 
                                  bokeh.palettes.Colorblind[3][0]],
                       
                       
                      # color = pp[0],
                     
                       x_axis_label = 'Genotype',
                      )
p.xgrid.grid_line_color = None

p.y_range = Range1d(-.05,1.1)
p.yaxis.axis_label_text_font_size = '8pt'
p.yaxis.axis_line_width = 2
p.xaxis.axis_line_width = 2

p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks

bokeh.io.show(p)

We define an order for the AFFL-2 rescue strains and plot the results. 

In [5]:
df_resc['Genotype'].unique()

array(['AFFL-2 Del+NLS::GFP', 'affl-2(sy975)', 'AFFL-2::GFP', 'N2',
       'AFFL-2 Del::GFP', '+', 'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP'], dtype=object)

In [6]:
order_resc = [
 ('N2', 'Before'),
 ('N2', 'After'),

 ('+', 'Before'),
 ('+', 'After'),
  ('affl-2(sy975)', 'Before'),
 ('affl-2(sy975)', 'After'),
 ('AFFL-2::GFP', 'Before'),
 ('AFFL-2::GFP', 'After'),
  ('AFFL-2 Del::GFP', 'Before'),
 ('AFFL-2 Del::GFP', 'After'),
('AFFL-2 Del+NLS::GFP', 'Before'),
 ('AFFL-2 Del+NLS::GFP', 'After'),
 
 ('AFFL-2 FUSLC+NLS::GFP', 'Before'),
 ('AFFL-2 FUSLC+NLS::GFP', 'After'),
 ('AFFL-2 FUSLC*+NLS::GFP', 'Before'),
 ('AFFL-2 FUSLC*+NLS::GFP', 'After')]

In [7]:

p = bebi103.viz.jitter(df_resc,
                       ['Genotype', 'Heat Shock'],
                       'Fraction Pumping',
                       horizontal=False,
                       plot_width=600,
                       plot_height = 300,
                       #palette=pp[::2],
                       
                       color_column = 'Heat Shock',
                       line_color = 'black',
                       size = 7,
                       order = order_resc,
                       palette = [bokeh.palettes.Colorblind[3][1], 
                                  bokeh.palettes.Colorblind[3][0]],
                       
                       
                      # color = pp[0],
                       #show_legend = True,
                     
                       x_axis_label = 'Genotype',
                      )
p.xgrid.grid_line_color = None

p.y_range = Range1d(-.05,1.1)
p.yaxis.axis_label_text_font_size = '8pt'
p.yaxis.axis_line_width = 2
p.xaxis.axis_line_width = 2

p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks

bokeh.io.show(p)

As you can see, the labels are very messy. Therefore, we used powerpoint to create our own custom labels. 

In [8]:
# now create dictionaries for model parameters 
def plot_with_error_bars(
    centers, confs, names, fill_color, line_kwargs={}, **kwargs
):
    """Make a horizontal plot of centers/conf ints with error bars.
    Parameters
    ----------
    centers : array_like, shape (n,)
        Array of center points for error bar plot.
    confs : array_like, shape (n, 2)
        Array of low and high values of confidence intervals
    names : list of strings
        Names of the variables for the plot. These give the y-ticks.
    marker_kwargs : dict, default {}
        Kwargs to be passed to p.circle() for plotting centers.
    line_kwargs : dict, default {}
        Kwargs passsed to p.line() to plot the confidence interval.
    kwargs : dict
        Any addition kwargs are passed to bokeh.plotting.figure().
    Returns
    -------
    output : Bokeh figure
        Plot of error bars.
    """
    n = len(names)
    if len(centers) != n:
        raise ValueError("len(centers) ≠ len(names)")
    if confs.shape != (n, 2):
        raise ValueError("Shape of `confs` must be (len(names), 2).")

    if "plot_height" not in kwargs and "frame_height" not in kwargs:
        kwargs["frame_height"] = 50 * n
    if "plot_width" not in kwargs and "frame_width" not in kwargs:
        kwargs["frame_width"] = 450
    line_width = kwargs.pop("line_width", 2)

    p = bokeh.plotting.figure(y_range=names[::-1], **kwargs)


    
    for conf, name, col, cen in zip(confs, names, fill_color, centers):
        p.line(x=conf, y=[name, name], line_width=2,line_color = col )
        p.circle(x=cen, y = [name], color = col, size =8)

    return p



def model_df_hier(df):
    df['N'] = df['Total']
    df['n'] = df['Yes'].values
   
    #df['K'] = 

    return df



# now create dictionaries for model parameters 
def get_params_h(df, K = 3, kappa_sig = 10.):
    params = {}
    for genotype in df['Genotype'].unique():

        df_gene = df.loc[df['Genotype'] == genotype, :].reset_index()
        params[genotype] = [len(df_gene), df_gene['N'].values, df_gene['n'].values, 1., 1., kappa_sig]
    return params
    

def model_run_h(params_list, model, low_noise = False, near_zero = False):
    K, N, n, theta_a, theta_b, kappa_sig = params_list

    if low_noise: 
        kappa_sig = 100. 
  

    
    info_dict = {'N': N.astype(int), 'n': n.astype(int), 'alpha_phi': float(theta_a), 
                 'beta_phi': theta_b, 'K': int(K), 'kappa_sig': float(kappa_sig)}
    print(info_dict)
    samples = model.sampling(data=info_dict)
    print(bebi103.stan.check_all_diagnostics(samples))
    return samples  

def plot_params(samples_dic, param = 'phi'):
    '''Plots Posterior Samples from Stan Models for Different strains '''
    ecdfs = []
    hists = []
    for genotype in samples_dic:
        Title = 'Strain {}, Posterior Samples'.format(genotype)
        samples = samples_dic[genotype][param]
        hists.append(bebi103.viz.histogram(samples, title=Title, plot_height = 150, plot_width = 250))
        ecdfs.append(bebi103.viz.ecdf(samples, title=Title, plot_height = 150, plot_width = 250))
    for ecdf in ecdfs[1:]:
        ecdf.x_range, ecdf.y_range = ecdfs[0].x_range, ecdfs[0].y_range
    
    for hist in hists[1:]:
        hist.x_range, hist.y_range = hists[0].x_range, hists[0].y_range
    
    bokeh.io.show(row(column(*hists), column(*ecdfs)))



## Hierarchical Model 

Likelihood:

$N_{hernia} \sim \text{Binomial}(N, p)$

Where $p$ is the probability of hernia, $N$ is the number of worms. 

Prior:

Level 0: $\phi \sim \text{Beta}(\alpha_p, \beta_p)$

$\kappa \sim \text{HalfNorm}(0,30)$

$\alpha = \phi \kappa$

$\beta = (1-\phi) \kappa$



Level 1: $\theta_i \sim \text{Beta}(\alpha, \beta)$

Measured data: $n_i \sim \text{Binomial}(N_i, \theta_i)$

Note that for all models $\alpha_p = \beta_p = 1$ so that the prior is uniform between 0 and 1. 




In [9]:
model_code = """
data {
  // Number of separate experiments
  int K;
  
  int N[K];
  int n[K];
  
  real alpha_phi;
  real beta_phi;
  real kappa_sig;
}


parameters {
  // Hyperparameters
  real<lower=0, upper=1> phi;
  real<lower=0> kappa;

  // Parameters
  real<lower=0, upper=1> theta[K];
}


transformed parameters {
  // Transformed hyperparameters
  real<lower=0> alpha = phi * kappa;
  real<lower=0> beta_ = (1 - phi) * kappa;
}


model {
  // Hyperpriors
  phi ~ beta(alpha_phi, beta_phi);
  kappa ~ normal(0, kappa_sig);
  
  // Prior
  theta ~ beta(alpha, beta_);
  
  // Likelihood
  n ~ binomial(N, theta);
}
"""

sm_h = bebi103.stan.StanModel(model_code=model_code)

Using cached StanModel.


In [10]:
# create dataframe with parameters for stan model
df_mut_model = model_df_hier(df_mut.loc[df_mut['Heat Shock'] == 'After', :])
df_resc_model = model_df_hier(df_resc.loc[df_resc['Heat Shock'] == 'After'])

df_full = pd.concat([df_mut_model, df_resc_model])
# get dictionary of parameters for each strain

all_params = get_params_h(df_full)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [11]:
samples_mut = {}
for genotype in df_mut_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = False)
    samples_mut[genotype] = samples_b

    print('')

Genotype:  N2
{'N': array([20, 26, 24, 23, 22, 26, 24, 30]), 'n': array([19, 26, 23, 23, 21, 26, 24, 30]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  +
{'N': array([24, 24, 24, 23, 25, 25, 32, 30]), 'n': array([0, 0, 0, 0, 0, 0, 0, 0]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  hsf-1(sy1198)
{'N': array([23, 26, 30, 24]), 'n': array([21, 26, 30, 23]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}
n_eff

In [12]:
plot_params(samples_mut)

In [13]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = df_mut_model['Genotype'].unique()
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.961, 95% Credible Region = [0.868, 0.989]

Genotype:  +
--------------------
Median = 0.021, 95% Credible Region = [0.004, 0.123]

Genotype:  hsf-1(sy1198)
--------------------
Median = 0.932, 95% Credible Region = [0.739, 0.985]

Genotype:  hsf-1(sy441)
--------------------
Median = 0.967, 95% Credible Region = [0.758, 0.995]

Genotype:  affl-2(sy975)
--------------------
Median = 0.955, 95% Credible Region = [0.867, 0.987]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 0.945, 95% Credible Region = [0.725, 0.990]

Genotype:  affl-1(sy1202)
--------------------
Median = 0.033, 95% Credible Region = [0.005, 0.234]



In [14]:
palette_mut = [bokeh.palettes.Colorblind[6][1]]*2 + [bokeh.palettes.Colorblind[6][3]]*2+\
                       bokeh.palettes.Colorblind[7][4:7]
p = plot_with_error_bars(
    cent, cred, genotypes, palette_mut,
)
p.x_range = Range1d(0.0, 1.0)

bokeh.io.show(p)

In [15]:
samples_resc = {}
for genotype in df_resc_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = True)
    samples_resc[genotype] = samples_b

    print('')

Genotype:  AFFL-2 Del+NLS::GFP
{'N': array([18, 25, 23, 23]), 'n': array([4, 3, 4, 5]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  affl-2(sy975)
{'N': array([24, 28, 30, 22, 25, 28, 32, 24]), 'n': array([24, 28, 30, 21, 24, 28, 28, 24]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2::GFP
{'N': array([15, 28, 27, 19]), 'n': array([0, 0, 0, 0]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}




n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
1 of 4000 (0.025%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
4

Genotype:  N2
{'N': array([20, 26, 24, 23, 22, 26, 24, 30]), 'n': array([19, 26, 23, 23, 21, 26, 24, 30]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2 Del::GFP
{'N': array([22, 23, 23, 21]), 'n': array([18, 12, 12, 12]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4

In [16]:
plot_params(samples_resc)

In [17]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = df_resc_model['Genotype'].unique()
genotypes = ['N2','+',  'affl-2(sy975)','AFFL-2::GFP',
       'AFFL-2 Del::GFP', 'AFFL-2 Del+NLS::GFP',  'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP']
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.978, 95% Credible Region = [0.944, 0.994]

Genotype:  +
--------------------
Median = 0.006, 95% Credible Region = [0.001, 0.038]

Genotype:  affl-2(sy975)
--------------------
Median = 0.969, 95% Credible Region = [0.922, 0.990]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.013, 95% Credible Region = [0.001, 0.113]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.607, 95% Credible Region = [0.470, 0.720]

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.190, 95% Credible Region = [0.111, 0.293]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.210, 95% Credible Region = [0.119, 0.352]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.485, 95% Credible Region = [0.336, 0.629]



In [20]:
palette_resc = [bokeh.palettes.Colorblind[6][1]]*2  + \
bokeh.palettes.Colorblind[8][4:8]+ bokeh.palettes.Colorblind[6][2:4]
p = plot_with_error_bars(
    cent, cred, genotypes,  palette_resc
)


p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

## Before Heat Shock

In [21]:
# create dataframe with parameters for stan model
df_mut_model_before = model_df_hier(df_mut.loc[df_mut['Heat Shock'] == 'Before', :])
df_resc_model_before = model_df_hier(df_resc.loc[df_resc['Heat Shock'] == 'Before'])

df_full_before = pd.concat([df_mut_model_before, df_resc_model_before])
# get dictionary of parameters for each strain


all_params = get_params_h(df_full_before)

print(all_params)

{'N2': [8, array([20, 21, 27, 25, 19, 23, 19, 21]), array([20, 20, 26, 25, 19, 23, 19, 20]), 1.0, 1.0, 10.0], '+': [8, array([19, 20, 22, 22, 21, 20, 25, 23]), array([19, 20, 22, 22, 21, 20, 25, 23]), 1.0, 1.0, 10.0], 'hsf-1(sy1198)': [4, array([20, 22, 26, 25]), array([20, 22, 25, 25]), 1.0, 1.0, 10.0], 'hsf-1(sy441)': [4, array([17, 17, 19, 23]), array([17, 17, 18, 23]), 1.0, 1.0, 10.0], 'affl-2(sy975)': [8, array([24, 23, 26, 27, 20, 26, 22, 22]), array([24, 23, 25, 27, 20, 26, 22, 22]), 1.0, 1.0, 10.0], 'affl-1(sy1220) affl-2(sy975)': [4, array([18, 20, 29, 26]), array([17, 20, 29, 26]), 1.0, 1.0, 10.0], 'affl-1(sy1202)': [4, array([23, 21, 22, 26]), array([23, 21, 22, 25]), 1.0, 1.0, 10.0], 'AFFL-2 Del+NLS::GFP': [4, array([17, 30, 19, 21]), array([17, 30, 19, 21]), 1.0, 1.0, 10.0], 'AFFL-2::GFP': [4, array([20, 21, 26, 18]), array([19, 21, 26, 18]), 1.0, 1.0, 10.0], 'AFFL-2 Del::GFP': [4, array([19, 21, 16, 20]), array([19, 21, 16, 20]), 1.0, 1.0, 10.0], 'AFFL-2 FUSLC+NLS::GFP': 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [22]:
samples_mut = {}
for genotype in df_mut_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h)
    samples_mut[genotype] = samples_b

    print('')

Genotype:  N2
{'N': array([20, 21, 27, 25, 19, 23, 19, 21]), 'n': array([20, 20, 26, 25, 19, 23, 19, 20]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  +
{'N': array([19, 20, 22, 22, 21, 20, 25, 23]), 'n': array([19, 20, 22, 22, 21, 20, 25, 23]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  hsf-1(sy1198)
{'N': array([20, 22, 26, 25]), 'n': array([20, 22, 25, 25]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.

In [23]:
plot_params(samples_mut)

In [24]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = df_mut_model['Genotype'].unique()
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.959, 95% Credible Region = [0.877, 0.988]

Genotype:  +
--------------------
Median = 0.979, 95% Credible Region = [0.860, 0.995]

Genotype:  hsf-1(sy1198)
--------------------
Median = 0.955, 95% Credible Region = [0.755, 0.992]

Genotype:  hsf-1(sy441)
--------------------
Median = 0.950, 95% Credible Region = [0.765, 0.991]

Genotype:  affl-2(sy975)
--------------------
Median = 0.974, 95% Credible Region = [0.865, 0.994]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 0.953, 95% Credible Region = [0.756, 0.992]

Genotype:  affl-1(sy1202)
--------------------
Median = 0.952, 95% Credible Region = [0.770, 0.992]



In [25]:
palette_mut = [bokeh.palettes.Colorblind[6][1]]*2 + [bokeh.palettes.Colorblind[6][3]]*2+\
                       bokeh.palettes.Colorblind[7][4:7]
p = plot_with_error_bars(
    cent, cred, genotypes, palette_mut,
)
p.x_range = Range1d(0.0, 1.0)

bokeh.io.show(p)

In [26]:
samples_resc = {}
for genotype in df_resc_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = False)
    samples_resc[genotype] = samples_b

    print('')

Genotype:  AFFL-2 Del+NLS::GFP
{'N': array([17, 30, 19, 21]), 'n': array([17, 30, 19, 21]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  affl-2(sy975)
{'N': array([24, 23, 26, 27, 20, 26, 22, 22]), 'n': array([24, 23, 25, 27, 20, 26, 22, 22]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2::GFP
{'N': array([20, 21, 26, 18]), 'n': array([19, 21, 26, 18]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}
n_

In [27]:
plot_params(samples_resc)

In [28]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = df_resc_model['Genotype'].unique()
genotypes = ['N2','+',  'affl-2(sy975)','AFFL-2::GFP',
       'AFFL-2 Del::GFP', 'AFFL-2 Del+NLS::GFP',  'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP']
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()
    

Genotype:  N2
--------------------
Median = 0.959, 95% Credible Region = [0.861, 0.988]

Genotype:  +
--------------------
Median = 0.979, 95% Credible Region = [0.886, 0.995]

Genotype:  affl-2(sy975)
--------------------
Median = 0.974, 95% Credible Region = [0.888, 0.994]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.951, 95% Credible Region = [0.738, 0.992]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.963, 95% Credible Region = [0.758, 0.995]

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.968, 95% Credible Region = [0.760, 0.995]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.965, 95% Credible Region = [0.775, 0.995]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.952, 95% Credible Region = [0.754, 0.991]



In [30]:
p = plot_with_error_bars(
    cent, cred, genotypes,  palette_resc
)

p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

In [None]:
%load_ext watermark
%watermark -v -p numpy,bokeh,bebi103,pandas,altair,jupyterlab