# Analysis of the growth rates measured on 2019-08-14

(c) 2019 Manuel Razo. This work is licensed under a [Creative Commons Attribution License CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/). All code contained herein is licensed under an [MIT license](https://opensource.org/licenses/MIT)

---

In [1]:
import os
import itertools

# Our numerical workhorses
import numpy as np
import scipy as sp
import scipy.signal
import pandas as pd

# Import matplotlib stuff for plotting
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib as mpl

# Seaborn, useful for graphics
import seaborn as sns

# Import Interactive plot libraries
import bokeh.plotting
import bokeh.layouts
from bokeh.themes import Theme
import holoviews as hv

# Import the project utils
import evo_mwc 

# This enables SVG graphics inline
%config InlineBackend.figure_format = 'retina'

bokeh.io.output_notebook()
hv.extension('bokeh')

  return inst.__call__(*args,**params)


In [2]:
# Set PBoC plotting format
evo_mwc.viz.pboc_style_mpl()
# Increase dpi
mpl.rcParams['figure.dpi'] = 110

# Set PBoC style for plot
theme = Theme(json=evo_mwc.viz.pboc_style_bokeh())
hv.renderer('bokeh').theme = theme

## Purpose
The purpose of this experiment was to test the functionality of the recently
cloned plasmids:
- pZS2*5-O2+11-sacB-tetA-gfp
- pZS2*5-O2+11-galK-tetA-gfp
  These constructs are still on plasmids, not integrated into the genome. As
  for now we just want to make sure that we obtain the expected qualitative
  behavior for each of the selection cassettes.
  
 ## Strains

| Plasmid | Genotype | Host Strain | Shorthand |
| :------ | :------- | ----------: | --------: |
| `none`| `∆lacI` |  HG105 |`HG105` |
| `none`| `∆lacI, ∆galK` |  HG105 |`∆galK` |
| `pZS2*5-O2+11-sacB-tetA-gfp`| `∆lacI` |  HG105 |`O2-sacB-tetA` |
| `pZS2*5-O2+11-galK-tetA-gfp`| `∆lacI` |  HG105 |`O2-galK-tetA` |

Let's begin by importing the growth rates as inferred with the Gaussian process method. We will start with the per-well analysis.

In [50]:
df_gp = pd.read_csv('./output/20190814_r1_gp_per_well.csv', index_col=False)
# Specify row and columns
df_gp['row'] = [x[0] for x in df_gp.well]
df_gp['col'] = [x[1::] for x in df_gp.well]

def sort_by_time(df, time='time_min'):
    '''
    Function to sort each well data by time
    '''
    return df.sort_values(by=time)

# Apply function and then apply drop level to remove resulting multiindex 
df_gp = df_gp.groupby('well').apply(sort_by_time).droplevel(level=0)

df_gp.head()

Unnamed: 0,OD600,OD_sub,blank_val,challenge,date,doubling_time,doubling_time_std,growth_rate,growth_rate_std,logOD_fit,...,neg_select,plasmid,pos_select,run_number,strain,temp_C,time_min,well,row,col
0,395,132.333333,262.666667,,20190814,176.378973,36.602667,0.00393,0.000816,6.309805,...,,,,1,HG105,37.1,1.783333,A01,A,1
1,653,225.833333,427.166667,,20190814,198.629183,40.679284,0.00349,0.000715,6.328346,...,,,,1,HG105,37.0,6.783333,A01,A,1
2,639,222.166667,416.833333,,20190814,225.871653,45.828138,0.003069,0.000623,6.344734,...,,,,1,HG105,37.0,11.783333,A01,A,1
3,636,211.666667,424.333333,,20190814,259.798076,52.513278,0.002668,0.000539,6.359067,...,,,,1,HG105,37.0,16.783333,A01,A,1
4,591,170.0,421.0,,20190814,302.929428,61.502096,0.002288,0.000465,6.371449,...,,,,1,HG105,37.0,21.783333,A01,A,1


Let's quickly take a look at all the inferred growth rates from each well. This is just a rough look at the kind of data we are looking at.

In [38]:
hv.output(size=50)
hv.Curve(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
).groupby(
    ['col', 'row']
).grid()

These measurements are really noisy, especially at the beginning of the growth curves. Let's take a look at the individual trajectories.

In [40]:
# Generate curves per well with dropdown menu
hv_OD = hv.Curve(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('OD600', 'OD600')],
    vdims=['well'],
).groupby('well')

hv_gr = hv.Curve(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
    vdims=['well'],
).groupby('well')

# Generate layout for plots on top of each other
hv_layout = hv.Layout(hv_OD.opts(width=800, height=400, xlabel='') + 
                      hv_gr.opts(width=800, height=400)).cols(1)
hv_layout

We can obviously find the maximum growth rate very easily just by finding the maximum value among the list. but for a more robust method we'll use `scipy`'s `signal` module. We'll do that very easily for each of the wells using the `apply` function form pandas.

In [56]:
# Initialize a boolean index for which time points are peaks
df_gp['peak_bool'] = [False] * len(df_gp)
df_gp['max_peak_bool'] = [False] * len(df_gp)

def find_peak(df, time_range=[200, 1200]):
    '''
    Function to find peaks on a signal.
    Parameters
    ----------
    df : Pandas DataFrame
    t_o : float
        Starting point for the analysis
    '''
    # Adjust time to consider only range set by user
    df = df[(df.time_min >= time_range[0]) &
            (df.time_min <= time_range[1])]
    # Find all peaks and add a boolean to indicate if they are a maximum
    peak_idx = scipy.signal.find_peaks(df.growth_rate)[0]
    df['peak_bool'].iloc[peak_idx] = True
    # Find the growth rate that gives the largest peak
    max_peak = np.max(df.growth_rate)
    max_idx = np.where(df.growth_rate == max_peak)[0]
    df['max_peak_bool'].iloc[max_idx] = True
    return df

# Use apply function on to group by well and
# find peak on each well
df_gp = df_gp.groupby('well').apply(find_peak).droplevel(level=0)

Excellent. Now we have a boolean index that tells us where the peaks happen. Let's go ahead and show again the growth rate with the peaks on the growth rate curve displayed.

In [57]:
# Define colors for plot
colors =  bokeh.palettes.Colorblind[3]

# Generate curves per well with dropdown menu
hv_OD = hv.Curve(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('OD600', 'OD600')],
    vdims=['well'],
).groupby('well')
# Set options for plot
hv_OD.opts(
    color=colors[0],
    width=800,
    height=400,
    xlabel='',
)

# Define plot for growth rate curve
hv_gr = hv.Curve(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
    vdims=['well'],
).groupby('well')
# Set plot color
hv_gr.opts(
    color=colors[0],
    width=800,
    height=400,
)

# Define scatter for peaks
hv_peaks = hv.Scatter(
    data=df_gp[df_gp.max_peak_bool],
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
    vdims=['well'],
).groupby('well')
# Set options for plot
hv_peaks.opts(
    color=colors[1],
    size=8,
)

hv.Layout(hv_OD + (hv_gr * hv_peaks)).cols(1)

Another way to display these data is by grouping by the strain and the selection they grew in rather than by single well to account for technical replicates.

In [58]:
# Define colors for plot
colors =  bokeh.palettes.Colorblind[3]

# Generate curves per well with dropdown menu
hv_OD = hv.Scatter(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('OD600', 'OD600')],
    vdims=['challenge', 'plasmid'],
).groupby(['challenge', 'plasmid'])
# Set options for plot
hv_OD.opts(
    color=colors[0],
    width=800,
    height=400,
    xlabel='',
)

hv_gr = hv.Scatter(
    data=df_gp,
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
    vdims=['challenge', 'plasmid'],
).groupby(['challenge', 'plasmid'])

# Define scatter for peaks
hv_peaks = hv.Scatter(
    data=df_gp[df_gp.max_peak_bool],
    kdims=[('time_min', 'time (min)',), 
           ('growth_rate', 'growth rate (min\u207B\u00B9)')],
    vdims=['challenge', 'plasmid'],
).groupby(['challenge', 'plasmid'])
# Set options for plot
hv_peaks.opts(
    color=colors[1],
    size=8,
)
# Set options for plot
hv_gr.opts(
    color=colors[0],
    width=800,
    height=400,
)

hv.Layout(hv_OD + (hv_gr * hv_peaks)).cols(1)