In [1]:
import numpy as np
import pandas as pd

import bebi103

import altair as alt
import altair_catplot as altcat

import bokeh.io
import bokeh.plotting
from bokeh.layouts import row, column
from bokeh.models import Range1d
import bokeh.application
import bokeh.application.handlers
bokeh.io.output_notebook()

Features requiring DataShader will not work and you will get exceptions.
  Features requiring DataShader will not work and you will get exceptions.""")


# Simple File Processing 

I had special names for all of my genotypes that I used to record data. For the purposes of plotting and for others understanding the data I renamed everything and made new dataframes.  


Implicit in all strains except N2 is syIs231 (which is hs:lin-3c). 

Implicit in all rescue strains except N2 and "+" is affl-2(sy975). 


**You may need to alter folders for files depending on your setup**

# Making Plots


First we load in our data.

In [2]:
# load data 

df_mut = pd.read_csv('data/mutant_lin3c.csv')
df_mut['Total'] = df_mut['Yes'].values +  df_mut['No'].values
print(df_mut['Genotype'].unique())

df_resc = pd.read_csv('data/rescue_lin3c.csv')
df_resc['Total'] = df_resc['Yes'].values +  df_resc['No'].values
print(df_resc['Genotype'].unique())


['N2' '+' 'hsf-1(sy1198)' 'hsf-1(sy441)' 'affl-2(sy975)'
 'affl-1(sy1220) affl-2(sy975)' 'affl-1(sy1202)']
['AFFL-2 Del+NLS::GFP' 'affl-2(sy975)' 'AFFL-2::GFP' 'N2'
 'AFFL-2 Del::GFP' '+' 'AFFL-2 FUSLC+NLS::GFP' 'AFFL-2 FUSLC*+NLS::GFP']


We define an order for the mutant strains and plot the results. 

In [3]:
order_mut = [('N2', 'Before'),
 ('N2', 'After'),
 ('+', 'Before'),
 ('+', 'After'),
 ('hsf-1(sy1198)', 'Before'),
 ('hsf-1(sy1198)', 'After'),
 ('hsf-1(sy441)', 'Before'),
 ('hsf-1(sy441)', 'After'),
 ('affl-2(sy975)', 'Before'),
 ('affl-2(sy975)', 'After'),
 ('affl-1(sy1202)', 'Before'),
 ('affl-1(sy1202)', 'After'),
 ('affl-1(sy1220) affl-2(sy975)', 'Before'),
 ('affl-1(sy1220) affl-2(sy975)', 'After')]

In [4]:

p = bebi103.viz.jitter(df_mut,
                       ['Genotype', 'Heat Shock'],
                       'Fraction Pumping',
                       horizontal=False,
                       plot_width=600,
                       plot_height = 300,
                       #palette=pp[::2],
                       line_color='black',
                       size = 7,
                     
                       
                       y_axis_label=None,
                       color_column = 'Heat Shock',
                       order = order_mut,
                       palette = [bokeh.palettes.Colorblind[3][1], 
                                  bokeh.palettes.Colorblind[3][0]],
                       
                       
                      # color = pp[0],
                     
                       x_axis_label = 'Genotype',
                      )
p.xgrid.grid_line_color = None

p.y_range = Range1d(-.05,1.1)
p.yaxis.axis_label_text_font_size = '8pt'
p.yaxis.axis_line_width = 2
p.xaxis.axis_line_width = 2

p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks

bokeh.io.show(p)

We define an order for the AFFL-2 rescue strains and plot the results. 

In [5]:
df_resc['Genotype'].unique()

array(['AFFL-2 Del+NLS::GFP', 'affl-2(sy975)', 'AFFL-2::GFP', 'N2',
       'AFFL-2 Del::GFP', '+', 'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP'], dtype=object)

In [6]:
order_resc = [
 ('N2', 'Before'),
 ('N2', 'After'),

 ('+', 'Before'),
 ('+', 'After'),
  ('affl-2(sy975)', 'Before'),
 ('affl-2(sy975)', 'After'),
 ('AFFL-2::GFP', 'Before'),
 ('AFFL-2::GFP', 'After'),
  ('AFFL-2 Del::GFP', 'Before'),
 ('AFFL-2 Del::GFP', 'After'),
('AFFL-2 Del+NLS::GFP', 'Before'),
 ('AFFL-2 Del+NLS::GFP', 'After'),
 
 ('AFFL-2 FUSLC+NLS::GFP', 'Before'),
 ('AFFL-2 FUSLC+NLS::GFP', 'After'),
 ('AFFL-2 FUSLC*+NLS::GFP', 'Before'),
 ('AFFL-2 FUSLC*+NLS::GFP', 'After')]

In [7]:

p = bebi103.viz.jitter(df_resc,
                       ['Genotype', 'Heat Shock'],
                       'Fraction Pumping',
                       horizontal=False,
                       plot_width=600,
                       plot_height = 300,
                       #palette=pp[::2],
                       
                       color_column = 'Heat Shock',
                       line_color = 'black',
                       size = 7,
                       order = order_resc,
                       palette = [bokeh.palettes.Colorblind[3][1], 
                                  bokeh.palettes.Colorblind[3][0]],
                       
                       
                      # color = pp[0],
                       #show_legend = True,
                     
                       x_axis_label = 'Genotype',
                      )
p.xgrid.grid_line_color = None

p.y_range = Range1d(-.05,1.1)
p.yaxis.axis_label_text_font_size = '8pt'
p.yaxis.axis_line_width = 2
p.xaxis.axis_line_width = 2

p.xaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.yaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off y-axis minor ticks

bokeh.io.show(p)

As you can see, the labels are very messy. Therefore, we used powerpoint to create our own custom labels. 

In [8]:
model_code = """
data {
  // Measured data
  int N;
  int n;
  
  // Prior parameters for theta
  real alpha_theta;
  real beta_theta;
}


parameters {
  real<lower=0, upper=1> theta;
}

model {
  theta ~ beta(alpha_theta, beta_theta);
  n ~ binomial(N, theta);
}
"""

sm = bebi103.stan.StanModel(model_code=model_code)

Using cached StanModel.


In [9]:
# now create dictionaries for model parameters 
def plot_with_error_bars(
    centers, confs, names, fill_color, line_kwargs={}, **kwargs
):
    """Make a horizontal plot of centers/conf ints with error bars.
    Parameters
    ----------
    centers : array_like, shape (n,)
        Array of center points for error bar plot.
    confs : array_like, shape (n, 2)
        Array of low and high values of confidence intervals
    names : list of strings
        Names of the variables for the plot. These give the y-ticks.
    marker_kwargs : dict, default {}
        Kwargs to be passed to p.circle() for plotting centers.
    line_kwargs : dict, default {}
        Kwargs passsed to p.line() to plot the confidence interval.
    kwargs : dict
        Any addition kwargs are passed to bokeh.plotting.figure().
    Returns
    -------
    output : Bokeh figure
        Plot of error bars.
    """
    n = len(names)
    if len(centers) != n:
        raise ValueError("len(centers) ≠ len(names)")
    if confs.shape != (n, 2):
        raise ValueError("Shape of `confs` must be (len(names), 2).")

    if "plot_height" not in kwargs and "frame_height" not in kwargs:
        kwargs["frame_height"] = 50 * n
    if "plot_width" not in kwargs and "frame_width" not in kwargs:
        kwargs["frame_width"] = 450
    line_width = kwargs.pop("line_width", 2)

    p = bokeh.plotting.figure(y_range=names[::-1], **kwargs)


    
    for conf, name, col, cen in zip(confs, names, fill_color, centers):
        p.line(x=conf, y=[name, name], line_width=2,line_color = col )
        p.circle(x=cen, y = [name], color = col, size =8)

    return p

def get_params(df, sleeping = [], somewhat_sleeping = [], somewhat_not_sleeping = []):
    '''Creates dictionaries of stan model parameters for different strains'''
    params = {}
    for genotype in df['Genotype'].unique():
        df_gene = df.loc[df['Genotype'] == genotype, :].reset_index()
        params[genotype] = [int(np.sum(df_gene['N'].values)), int(np.sum(df_gene['n'].values)), 1, 1]
    return params
    

def model_run(params_list, model):
    '''Runs stan model and prints diagnostics'''
    N, n, theta_a, theta_b = params_list
    info_dict = {'N': N, 'n': n, 'alpha_theta': theta_a, 'beta_theta': theta_b}
    samples = model.sampling(data=info_dict)
    print(bebi103.stan.check_all_diagnostics(samples))
    return samples  


def model_df_hier(df):
    df['N'] = df['Total']
    df['n'] = df['Yes'].values
   
    #df['K'] = 

    return df



def plot_params(samples_dic, param = 'phi'):
    '''Plots Posterior Samples from Stan Models for Different strains '''
    ecdfs = []
    hists = []
    for genotype in samples_dic:
        Title = 'Strain {}, Posterior Samples'.format(genotype)
        samples = samples_dic[genotype][param]
        hists.append(bebi103.viz.histogram(samples, title=Title, plot_height = 150, plot_width = 250))
        ecdfs.append(bebi103.viz.ecdf(samples, title=Title, plot_height = 150, plot_width = 250))
    for ecdf in ecdfs[1:]:
        ecdf.x_range, ecdf.y_range = ecdfs[0].x_range, ecdfs[0].y_range
    
    for hist in hists[1:]:
        hist.x_range, hist.y_range = hists[0].x_range, hists[0].y_range
    
    bokeh.io.show(row(column(*hists), column(*ecdfs)))

In [10]:
df_mut_after = df_mut.loc[df_mut['Heat Shock'] == 'After', :]
df_mut_model = model_df_hier(df_mut_after)
mut_params = get_params(df_mut_model)
samples_mut = {}
phi_samples = {}
for genotype in df_mut_model['Genotype'].unique():
    print('Genotype: ', genotype)

    
    samples_b = model_run(mut_params[genotype], sm)
    print(mut_params[genotype])
    samples_mut[genotype] = samples_b
    phi_samples[genotype] = samples_b['theta']

    print('')

Genotype:  N2


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[93, 91, 1, 1]

Genotype:  +
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[95, 0, 1, 1]

Genotype:  hsf-1(sy1198)
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[103, 100, 1, 1]

Genotype:  hsf-1(sy441)
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterat

In [11]:
plot_params(samples_mut, param = 'theta')

In [12]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = []
for i, genotype in enumerate(df_mut_model['Genotype'].unique()):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    genotypes.append(genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['theta'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.972, 95% Credible Region = [0.926, 0.994]

Genotype:  +
--------------------
Median = 0.007, 95% Credible Region = [0.000, 0.037]

Genotype:  hsf-1(sy1198)
--------------------
Median = 0.964, 95% Credible Region = [0.914, 0.989]

Genotype:  hsf-1(sy441)
--------------------
Median = 0.991, 95% Credible Region = [0.956, 1.000]

Genotype:  affl-2(sy975)
--------------------
Median = 0.984, 95% Credible Region = [0.948, 0.998]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 0.972, 95% Credible Region = [0.929, 0.993]

Genotype:  affl-1(sy1202)
--------------------
Median = 0.007, 95% Credible Region = [0.000, 0.034]



In [13]:
palette_mut = [bokeh.palettes.Colorblind[6][1]]*2 + [bokeh.palettes.Colorblind[6][3]]*2+\
                       bokeh.palettes.Colorblind[7][4:7]

In [14]:

p = plot_with_error_bars(
    cent, cred, genotypes,  palette_mut
)


p.yaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

In [15]:
df_resc_after = df_resc.loc[df_resc['Heat Shock'] == 'After', :]
df_resc_model = model_df_hier(df_resc_after)

resc_params = get_params(df_resc_model)

print(resc_params)
resc_params = get_params(df_resc_model)
samples_resc = {}
theta_samples = {}
genotypes = []
for genotype in df_resc_model['Genotype'].unique():
    print('Genotype: ', genotype)
    genotypes.append(genotype)

    samples_b = model_run(resc_params[genotype], sm)
    print(resc_params[genotype])
    samples_resc[genotype] = samples_b
    theta_samples[genotype] = samples_b['theta']

    print('')

{'AFFL-2 Del+NLS::GFP': [89, 16, 1, 1], 'affl-2(sy975)': [109, 104, 1, 1], 'AFFL-2::GFP': [89, 0, 1, 1], 'N2': [102, 101, 1, 1], 'AFFL-2 Del::GFP': [89, 54, 1, 1], '+': [112, 0, 1, 1], 'AFFL-2 FUSLC+NLS::GFP': [96, 19, 1, 1], 'AFFL-2 FUSLC*+NLS::GFP': [88, 43, 1, 1]}
Genotype:  AFFL-2 Del+NLS::GFP


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[89, 16, 1, 1]

Genotype:  affl-2(sy975)
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[109, 104, 1, 1]

Genotype:  AFFL-2::GFP
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0
[89, 0, 1, 1]

Genotype:  N2
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterat

In [16]:
plot_params(samples_resc, param = 'theta')

In [17]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = []
for i, genotype in enumerate(df_resc_model['Genotype'].unique()):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    genotypes.append(genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['theta'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.186, 95% Credible Region = [0.115, 0.278]

Genotype:  affl-2(sy975)
--------------------
Median = 0.949, 95% Credible Region = [0.897, 0.980]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.008, 95% Credible Region = [0.000, 0.039]

Genotype:  N2
--------------------
Median = 0.984, 95% Credible Region = [0.950, 0.997]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.603, 95% Credible Region = [0.504, 0.699]

Genotype:  +
--------------------
Median = 0.007, 95% Credible Region = [0.000, 0.033]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.203, 95% Credible Region = [0.132, 0.286]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.489, 95% Credible Region = [0.387, 0.590]



In [18]:
palette_resc = [bokeh.palettes.Colorblind[6][1]]*2  +  \
bokeh.palettes.Colorblind[8][4:8]+ bokeh.palettes.Colorblind[6][2:4]
p = plot_with_error_bars(
    cent, cred, genotypes,  palette_resc
)


p.yaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off x-axis minor ticks


p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

# bayesian analysis 

## Hierarchical 

Likelihood:

$N_{hernia} \sim \text{Binomial}(N, p)$

Where $p$ is the probability of hernia, $N$ is the number of worms. 

Prior:

Level 0: $\phi \sim \text{Beta}(\alpha_p, \beta_p)$

$\kappa \sim \text{HalfNorm}(0,10)$


$\alpha = \phi \kappa$

$\beta = (1-\phi) \kappa$



Level 1: $\theta_i \sim \text{Beta}(\alpha, \beta)$

Measured data: $n_i \sim \text{Binomial}(N_i, \theta_i)$





In [19]:
model_code = """
data {
  // Number of separate experiments
  int K;
  
  int N[K];
  int n[K];
  
  real alpha_phi;
  real beta_phi;
  real kappa_sig;
}


parameters {
  // Hyperparameters
  real<lower=0, upper=1> phi;
  real<lower=0> kappa;

  // Parameters
  real<lower=0, upper=1> theta[K];
}


transformed parameters {
  // Transformed hyperparameters
  real<lower=0> alpha = phi * kappa;
  real<lower=0> beta_ = (1 - phi) * kappa;
}


model {
  // Hyperpriors
  phi ~ beta(alpha_phi, beta_phi);
  kappa ~ normal(0, kappa_sig);
  
  // Prior
  theta ~ beta(alpha, beta_);
  
  // Likelihood
  n ~ binomial(N, theta);
}
"""

sm_h = bebi103.stan.StanModel(model_code=model_code)

Using cached StanModel.


In [20]:
def model_df_hier(df):
    df['N'] = df['Total']
    df['n'] = df['Yes'].values
   
    #df['K'] = 

    return df

In [21]:
# now create dictionaries for model parameters 
def get_params_h(df, K = 3, kappa_sig = 10.,
                near_zero = [], near_one = [], mid = []):
    params = {}
    for genotype in df['Genotype'].unique():

        df_gene = df.loc[df['Genotype'] == genotype, :].reset_index()
        if genotype in near_zero:
            params[genotype] = [len(df_gene), df_gene['N'].values, df_gene['n'].values, 1., 1.7, kappa_sig]
        elif genotype in mid:
     
            params[genotype] = [len(df_gene), df_gene['N'].values, df_gene['n'].values, 1.36, 1.43, kappa_sig]
        elif genotype in near_one:
            params[genotype] = [len(df_gene), df_gene['N'].values, df_gene['n'].values, 1.66, 1., kappa_sig]
        else: 
            params[genotype] = [len(df_gene), df_gene['N'].values, df_gene['n'].values, 1., 1., kappa_sig]
    return params
    

def model_run_h(params_list, model, low_noise = False, near_zero = False):
    K, N, n, theta_a, theta_b, kappa_sig = params_list

    if low_noise: 
        kappa_sig = 100. 
  

    
    info_dict = {'N': N.astype(int), 'n': n.astype(int), 'alpha_phi': float(theta_a), 
                 'beta_phi': theta_b, 'K': int(K), 'kappa_sig': float(kappa_sig)}
    print(info_dict)
    samples = model.sampling(data=info_dict)
    print(bebi103.stan.check_all_diagnostics(samples))
    return samples  



In [43]:
# create dataframe with parameters for stan model
df_mut_model = model_df_hier(df_mut.loc[df_mut['Heat Shock'] == 'After', :])
df_resc_model = model_df_hier(df_resc.loc[df_resc['Heat Shock'] == 'After'])

df_full = pd.concat([df_mut_model, df_resc_model])
# get dictionary of parameters for each strain
near_zero = ['+',  'affl-1(sy1202)', 'AFFL-2::GFP']
mid = ['AFFL-2 Del+NLS::GFP', 'AFFL-2 FUSLC*+NLS::GFP', 'AFFL-2 FUSLC+NLS::GFP', 'AFFL-2 Del::GFP']
near_one = ['N2', 'affl-2(sy975)', 
             'affl-1(sy1220) affl-2(sy975)', 'hsf-1(sy1198)'
            'hsf-1(sy441)']

all_params = get_params_h(df_full, near_zero = near_zero, 
                          near_one = near_one, mid = mid)

print(all_params)

{'N2': [8, array([20, 26, 24, 23, 22, 26, 24, 30]), array([19, 26, 23, 23, 21, 26, 24, 30]), 1.66, 1.0, 10.0], '+': [8, array([24, 24, 24, 23, 25, 25, 32, 30]), array([0, 0, 0, 0, 0, 0, 0, 0]), 1.0, 1.7, 10.0], 'hsf-1(sy1198)': [4, array([23, 26, 30, 24]), array([21, 26, 30, 23]), 1.0, 1.0, 10.0], 'hsf-1(sy441)': [4, array([21, 20, 21, 21]), array([21, 20, 21, 21]), 1.0, 1.0, 10.0], 'affl-2(sy975)': [8, array([24, 28, 30, 22, 25, 28, 32, 24]), array([24, 28, 30, 21, 24, 28, 28, 24]), 1.66, 1.0, 10.0], 'affl-1(sy1220) affl-2(sy975)': [4, array([21, 27, 29, 22]), array([21, 27, 29, 20]), 1.66, 1.0, 10.0], 'affl-1(sy1202)': [4, array([23, 28, 24, 26]), array([0, 0, 0, 0]), 1.0, 1.7, 10.0], 'AFFL-2 Del+NLS::GFP': [4, array([18, 25, 23, 23]), array([4, 3, 4, 5]), 1.36, 1.43, 10.0], 'AFFL-2::GFP': [4, array([15, 28, 27, 19]), array([0, 0, 0, 0]), 1.0, 1.7, 10.0], 'AFFL-2 Del::GFP': [4, array([22, 23, 23, 21]), array([18, 12, 12, 12]), 1.36, 1.43, 10.0], 'AFFL-2 FUSLC+NLS::GFP': [4, array([19

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [51]:
samples_mut = {}
for genotype in df_mut_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = False)
    samples_mut[genotype] = samples_b

    print('')

Genotype:  N2
{'N': array([20, 26, 24, 23, 22, 26, 24, 30]), 'n': array([19, 26, 23, 23, 21, 26, 24, 30]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  +
{'N': array([24, 24, 24, 23, 25, 25, 32, 30]), 'n': array([0, 0, 0, 0, 0, 0, 0, 0]), 'alpha_phi': 1.0, 'beta_phi': 1.7, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  hsf-1(sy1198)
{'N': array([23, 26, 30, 24]), 'n': array([21, 26, 30, 23]), 'alpha_phi': 1.0, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}
n_ef

In [53]:
plot_params(samples_mut, param = 'kappa')

In [52]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = df_mut_model['Genotype'].unique()
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['kappa'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 9.570, 95% Credible Region = [1.552, 24.733]

Genotype:  +
--------------------
Median = 6.953, 95% Credible Region = [0.770, 22.796]

Genotype:  hsf-1(sy1198)
--------------------
Median = 7.305, 95% Credible Region = [0.767, 22.677]

Genotype:  hsf-1(sy441)
--------------------
Median = 5.556, 95% Credible Region = [0.359, 20.650]

Genotype:  affl-2(sy975)
--------------------
Median = 8.261, 95% Credible Region = [1.270, 22.441]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 5.511, 95% Credible Region = [0.550, 20.543]

Genotype:  affl-1(sy1202)
--------------------
Median = 5.254, 95% Credible Region = [0.361, 20.810]



In [46]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = df_mut_model['Genotype'].unique()
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.961, 95% Credible Region = [0.869, 0.989]

Genotype:  +
--------------------
Median = 0.021, 95% Credible Region = [0.004, 0.117]

Genotype:  hsf-1(sy1198)
--------------------
Median = 0.934, 95% Credible Region = [0.756, 0.984]

Genotype:  hsf-1(sy441)
--------------------
Median = 0.964, 95% Credible Region = [0.732, 0.995]

Genotype:  affl-2(sy975)
--------------------
Median = 0.955, 95% Credible Region = [0.857, 0.986]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 0.946, 95% Credible Region = [0.745, 0.990]

Genotype:  affl-1(sy1202)
--------------------
Median = 0.031, 95% Credible Region = [0.005, 0.221]



In [26]:
palette_mut = [bokeh.palettes.Colorblind[6][1]]*2 + [bokeh.palettes.Colorblind[6][3]]*2+\
                       bokeh.palettes.Colorblind[7][4:7]
p = plot_with_error_bars(
    cent, cred, genotypes, palette_mut,
)
p.x_range = Range1d(0.0, 1.0)

bokeh.io.show(p)

In [41]:
samples_resc = {}
for genotype in df_resc_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = True)
    samples_resc[genotype] = samples_b

    print('')

Genotype:  AFFL-2 Del+NLS::GFP
{'N': array([17, 30, 19, 21]), 'n': array([17, 30, 19, 21]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}




n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
2 of 4000 (0.05%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
4

Genotype:  affl-2(sy975)
{'N': array([24, 23, 26, 27, 20, 26, 22, 22]), 'n': array([24, 23, 25, 27, 20, 26, 22, 22]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2::GFP
{'N': array([20, 21, 26, 18]), 'n': array([19, 21, 26, 18]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}




n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
9 of 4000 (0.225%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
4

Genotype:  N2
{'N': array([20, 21, 27, 25, 19, 23, 19, 21]), 'n': array([20, 20, 26, 25, 19, 23, 19, 20]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2 Del::GFP
{'N': array([19, 21, 16, 20]), 'n': array([19, 21, 16, 20]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}




n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
4 of 4000 (0.1%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
4

Genotype:  +
{'N': array([19, 20, 22, 22, 21, 20, 25, 23]), 'n': array([19, 20, 22, 22, 21, 20, 25, 23]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2 FUSLC+NLS::GFP
{'N': array([20, 22, 21, 17]), 'n': array([20, 22, 21, 17]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 100.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0



n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
4 of 4000 (0.1%) iterations ended with a divergence.
  Try running with larger adapt_delta to remove divergences.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
4



In [28]:
plot_params(samples_resc)

In [42]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = df_resc_model['Genotype'].unique()
genotypes = ['N2','+',  'affl-2(sy975)','AFFL-2::GFP',
       'AFFL-2 Del::GFP', 'AFFL-2 Del+NLS::GFP',  'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP']
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.976, 95% Credible Region = [0.939, 0.993]

Genotype:  +
--------------------
Median = 0.994, 95% Credible Region = [0.961, 0.999]

Genotype:  affl-2(sy975)
--------------------
Median = 0.989, 95% Credible Region = [0.958, 0.998]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.975, 95% Credible Region = [0.891, 0.996]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.986, 95% Credible Region = [0.896, 0.999]

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.988, 95% Credible Region = [0.914, 0.999]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.986, 95% Credible Region = [0.917, 0.999]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.977, 95% Credible Region = [0.907, 0.996]



In [29]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = df_resc_model['Genotype'].unique()
genotypes = ['N2','+',  'affl-2(sy975)','AFFL-2::GFP',
       'AFFL-2 Del::GFP', 'AFFL-2 Del+NLS::GFP',  'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP']
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()
    

Genotype:  N2
--------------------
Median = 0.961, 95% Credible Region = [0.879, 0.988]

Genotype:  +
--------------------
Median = 0.019, 95% Credible Region = [0.004, 0.119]

Genotype:  affl-2(sy975)
--------------------
Median = 0.955, 95% Credible Region = [0.860, 0.987]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.034, 95% Credible Region = [0.005, 0.213]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.593, 95% Credible Region = [0.411, 0.758]

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.220, 95% Credible Region = [0.108, 0.390]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.239, 95% Credible Region = [0.112, 0.431]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.483, 95% Credible Region = [0.295, 0.670]



In [30]:
p = plot_with_error_bars(
    cent, cred, genotypes,  palette_resc
)


p.yaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

In [31]:
# create dataframe with parameters for stan model
df_mut_model_before = model_df_hier(df_mut.loc[df_mut['Heat Shock'] == 'Before', :])
df_resc_model_before = model_df_hier(df_resc.loc[df_resc['Heat Shock'] == 'Before'])

df_full_before = pd.concat([df_mut_model_before, df_resc_model_before])
# get dictionary of parameters for each strain

near_one_before = ['N2', 'affl-2(sy975)', '+',  'affl-1(sy1202)', 'AFFL-2::GFP',
             'affl-1(sy1220) affl-2(sy975)', 'hsf-1(sy1198)', 'hsf-1(sy441)', 
           'AFFL-2 Del+NLS::GFP', 'AFFL-2 FUSLC*+NLS::GFP', 'AFFL-2 FUSLC+NLS::GFP', 'AFFL-2 Del::GFP']

all_params = get_params_h(df_full_before, near_one = near_one_before)

print(all_params)

{'N2': [8, array([20, 21, 27, 25, 19, 23, 19, 21]), array([20, 20, 26, 25, 19, 23, 19, 20]), 1.66, 1.0, 10.0], '+': [8, array([19, 20, 22, 22, 21, 20, 25, 23]), array([19, 20, 22, 22, 21, 20, 25, 23]), 1.66, 1.0, 10.0], 'hsf-1(sy1198)': [4, array([20, 22, 26, 25]), array([20, 22, 25, 25]), 1.66, 1.0, 10.0], 'hsf-1(sy441)': [4, array([17, 17, 19, 23]), array([17, 17, 18, 23]), 1.66, 1.0, 10.0], 'affl-2(sy975)': [8, array([24, 23, 26, 27, 20, 26, 22, 22]), array([24, 23, 25, 27, 20, 26, 22, 22]), 1.66, 1.0, 10.0], 'affl-1(sy1220) affl-2(sy975)': [4, array([18, 20, 29, 26]), array([17, 20, 29, 26]), 1.66, 1.0, 10.0], 'affl-1(sy1202)': [4, array([23, 21, 22, 26]), array([23, 21, 22, 25]), 1.66, 1.0, 10.0], 'AFFL-2 Del+NLS::GFP': [4, array([17, 30, 19, 21]), array([17, 30, 19, 21]), 1.66, 1.0, 10.0], 'AFFL-2::GFP': [4, array([20, 21, 26, 18]), array([19, 21, 26, 18]), 1.66, 1.0, 10.0], 'AFFL-2 Del::GFP': [4, array([19, 21, 16, 20]), array([19, 21, 16, 20]), 1.66, 1.0, 10.0], 'AFFL-2 FUSLC+N

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [32]:
samples_mut = {}
for genotype in df_mut_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h)
    samples_mut[genotype] = samples_b

    print('')

Genotype:  N2
{'N': array([20, 21, 27, 25, 19, 23, 19, 21]), 'n': array([20, 20, 26, 25, 19, 23, 19, 20]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  +
{'N': array([19, 20, 22, 22, 21, 20, 25, 23]), 'n': array([19, 20, 22, 22, 21, 20, 25, 23]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  hsf-1(sy1198)
{'N': array([20, 22, 26, 25]), 'n': array([20, 22, 25, 25]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 

In [33]:
plot_params(samples_mut)

In [34]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_mut_model['Genotype'].unique()),2))
cent = np.zeros(len(df_mut_model['Genotype'].unique()))
genotypes = df_mut_model['Genotype'].unique()
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_mut[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()

Genotype:  N2
--------------------
Median = 0.960, 95% Credible Region = [0.873, 0.988]

Genotype:  +
--------------------
Median = 0.980, 95% Credible Region = [0.892, 0.996]

Genotype:  hsf-1(sy1198)
--------------------
Median = 0.953, 95% Credible Region = [0.769, 0.991]

Genotype:  hsf-1(sy441)
--------------------
Median = 0.949, 95% Credible Region = [0.767, 0.991]

Genotype:  affl-2(sy975)
--------------------
Median = 0.974, 95% Credible Region = [0.880, 0.994]

Genotype:  affl-1(sy1220) affl-2(sy975)
--------------------
Median = 0.955, 95% Credible Region = [0.761, 0.992]

Genotype:  affl-1(sy1202)
--------------------
Median = 0.956, 95% Credible Region = [0.789, 0.992]



In [35]:
palette_mut = [bokeh.palettes.Colorblind[6][1]]*2 + [bokeh.palettes.Colorblind[6][3]]*2+\
                       bokeh.palettes.Colorblind[7][4:7]
p = plot_with_error_bars(
    cent, cred, genotypes, palette_mut,
)
p.x_range = Range1d(0.0, 1.0)

bokeh.io.show(p)

In [36]:
samples_resc = {}
for genotype in df_resc_model['Genotype'].unique():
    print('Genotype: ', genotype)

    samples_b = model_run_h(all_params[genotype], sm_h, low_noise = False)
    samples_resc[genotype] = samples_b

    print('')

Genotype:  AFFL-2 Del+NLS::GFP
{'N': array([17, 30, 19, 21]), 'n': array([17, 30, 19, 21]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  affl-2(sy975)
{'N': array([24, 23, 26, 27, 20, 26, 22, 22]), 'n': array([24, 23, 25, 27, 20, 26, 22, 22]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 8, 'kappa_sig': 10.0}
n_eff / iter looks reasonable for all parameters.
Rhat looks reasonable for all parameters.
0 of 4000 (0.0%) iterations ended with a divergence.
0 of 4000 (0.0%) iterations saturated the maximum tree depth of 10.
E-BFMI indicated no pathological behavior.
0

Genotype:  AFFL-2::GFP
{'N': array([20, 21, 26, 18]), 'n': array([19, 21, 26, 18]), 'alpha_phi': 1.66, 'beta_phi': 1.0, 'K': 4, 'kappa_sig': 10.0}

In [37]:
plot_params(samples_resc)

In [38]:
# ksgi 10
percs, strPercs = [2.5, 50, 97.5], ['2point5', 'Median', '97point5']
cred = np.zeros((len(df_resc_model['Genotype'].unique()),2))
cent = np.zeros(len(df_resc_model['Genotype'].unique()))
genotypes = df_resc_model['Genotype'].unique()
genotypes = ['N2','+',  'affl-2(sy975)','AFFL-2::GFP',
       'AFFL-2 Del::GFP', 'AFFL-2 Del+NLS::GFP',  'AFFL-2 FUSLC+NLS::GFP',
       'AFFL-2 FUSLC*+NLS::GFP']
for i, genotype in enumerate(genotypes):
    
    # create dictionary with all samples from different conditions 
  
    # Print results
    print('Genotype: ', genotype)
    print('-' * 20)
 
    vals_per = np.percentile(samples_resc[genotype]['phi'], percs)

    print("Median = {:.3f}, 95% Credible Region = [{:.3f}, {:.3f}]"\
              .format(vals_per[1], vals_per[0], vals_per[2]))
    cred[i, :] = np.array([vals_per[0], vals_per[2]])
    cent[i] = vals_per[1]
    print()
    

Genotype:  N2
--------------------
Median = 0.960, 95% Credible Region = [0.883, 0.988]

Genotype:  +
--------------------
Median = 0.979, 95% Credible Region = [0.880, 0.996]

Genotype:  affl-2(sy975)
--------------------
Median = 0.974, 95% Credible Region = [0.878, 0.994]

Genotype:  AFFL-2::GFP
--------------------
Median = 0.951, 95% Credible Region = [0.792, 0.991]

Genotype:  AFFL-2 Del::GFP
--------------------
Median = 0.966, 95% Credible Region = [0.782, 0.995]

Genotype:  AFFL-2 Del+NLS::GFP
--------------------
Median = 0.966, 95% Credible Region = [0.766, 0.995]

Genotype:  AFFL-2 FUSLC+NLS::GFP
--------------------
Median = 0.966, 95% Credible Region = [0.777, 0.995]

Genotype:  AFFL-2 FUSLC*+NLS::GFP
--------------------
Median = 0.952, 95% Credible Region = [0.750, 0.991]



In [39]:
p = plot_with_error_bars(
    cent, cred, genotypes,  palette_resc
)


p.yaxis.major_tick_line_color = None  # turn off x-axis major ticks
p.yaxis.minor_tick_line_color = None  # turn off x-axis minor ticks

p.xaxis.major_tick_line_color = None  # turn off y-axis major ticks
p.xaxis.minor_tick_line_color = None  # turn off y-axis minor ticks
p.x_range = Range1d(0.0, 1.0)
bokeh.io.show(p)

In [40]:
%load_ext watermark
%watermark -v -p numpy,bokeh,bebi103,pandas,altair,jupyterlab

CPython 3.7.6
IPython 7.11.1

numpy 1.17.4
bokeh 1.4.0
bebi103 0.0.40
pandas 0.25.3
altair 4.0.0
jupyterlab 1.2.4
