In this notebook, we will be analyzing the dnaseI assay data as shown in Figure 4 and Supplementary Figure 30

In [2]:
import numpy as np
import pandas as pd
import scipy.optimize
import scipy.integrate
import murraylab_tools.biotek as btek
import itertools
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from cycler import cycler

sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 1.5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})


mpl.rc('axes', prop_cycle=(cycler('color', ['r', 'k', 'b','g','y','m','c']) ))

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42
tw = 1.5
sns.set_style({"xtick.major.size": 3, "ytick.major.size": 3,
               "xtick.minor.size": 2, "ytick.minor.size": 2,
               'axes.labelsize': 16, 'axes.titlesize': 16,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize=14) 
mpl.rc('ytick', labelsize=14)
mpl.rc('axes', linewidth=1.5)
mpl.rc('legend', fontsize=14)
mpl.rc('legend', frameon=False)
mpl.rc('figure', figsize=(8.5,15))
sns.set_palette('colorblind',5)
%matplotlib qt

In [4]:
# generate tidy data frames, tidy data can instead be loaded below without needing the btek package
btek.tidy_biotek_data('./20211114_dnase_assay_lysate.csv',convert_to_uM=False,volume=300,supplementary_filename='20211114_metadata.csv')
btek.tidy_biotek_data('./20211114_dnase_assay_lysate_ctrls.csv',convert_to_uM=False,volume=300,supplementary_filename='20211114_metadata_ctrls.csv');

  "concentrations.") % line[1])
  "concentrations.") % line[1])
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % well_name)
  % we

In [5]:
# load tidy data
dnase_data = pd.read_csv('20211114_dnase_assay_lysate_tidy.csv')
dnase_ctrl_data = pd.read_csv('20211114_dnase_assay_lysate_ctrls_tidy.csv')

In [6]:
# get starting and ending fluorescence, and calculate end/start fluorescence for each control sample
df_sybr_ctrl = dnase_ctrl_data.loc[(dnase_ctrl_data.Channel=='sybr')&(dnase_ctrl_data.Gain==60),:]
t_endpoint = df_sybr_ctrl['Time (hr)'].max()
t_start = df_sybr_ctrl['Time (hr)'].min()
df_sybr_ctrl_endpoint = df_sybr_ctrl.loc[df_sybr_ctrl['Time (hr)']==t_endpoint,:]
df_sybr_ctrl_start = df_sybr_ctrl.loc[df_sybr_ctrl['Time (hr)']==t_start,:]
df_sybr_ctrl_endpoint['end/start'] = df_sybr_ctrl_endpoint.Measurement.values / df_sybr_ctrl_start.Measurement.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys


We will fit use the end/beginning ratio from dnaseI standards to fit a model of the form

\begin{align}
\frac{F_{end}}{F_0} = e^{-2kD^n} - b,
\end{align}

where $b$ describes background loss of fluorescence through photo-bleaching or other non-dnaseI related means of loss of fluorescence, $k$ is the first-order rate constant describing the degradation of DNA by dnaseI, $D$ is the concentration of dnaseI (U/rxn), $n$ is a phenomenological constant which captures the non-linear relationship between dnaseI concentration and observed loss of fluorescence, 2 is the time in hours for which the assay was ran.

In [7]:
def endpoint(p, dnase):
    """
    Theoretical model for spindle length
    """
    k, bg, n = p
    return np.exp(-2*k*(dnase**n)) - bg

def resid(p, dnase, fluor):
    """
    Residuals for spindle length model.
    """
    return fluor - endpoint(p, dnase)

# Extra arguments as a tuple
dnase_amts = np.array([0,0.25,0.5,1,2])
fluor = df_sybr_ctrl_endpoint['end/start'].values
dnase_amts = df_sybr_ctrl_endpoint.dnaseI.values
args = (dnase_amts,fluor)

p0 = np.array([0.8,0.2,0.5])

# Compute the MAP
popt, _ = scipy.optimize.leastsq(resid, p0, args=args)

# Extract the values
k, bg, n = popt

# # Print results
print("""
Model most probable parameters
----------------------------------
k = {0:.3f} µm
n = {1:.3f}
b = {2:.3f}

""".format(k,n,bg))


Model most probable parameters
----------------------------------
k = 0.177 µm
n = 0.440
b = 0.132




We now use this function to determine the calulated dnaseI concentration for our standards using the experimental data to assess the fit. 

In [10]:
fluor_ep = df_sybr_ctrl_endpoint['end/start'].values
df_sybr_ctrl_endpoint['dnase_calc'] = (-1/(2*k)*np.log(fluor_ep+bg))**(1/n)
df_sybr_ctrl_endpoint_avg = df_sybr_ctrl_endpoint.groupby(['dnaseI'],as_index=False)['dnase_calc'].mean()
df_sybr_ctrl_endpoint_avg

    

  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,dnaseI,dnase_calc
0,0.0,0.000351
1,0.25,0.235452
2,0.5,0.50857
3,1.0,1.154415
4,2.0,1.895187


Below we make the plot shown in Supplementary Figure 30B with data from standards plotted as points, and the results of our fitted equation.

In [11]:
# Values of dnase to plot
dnase_plot = np.linspace(0, 5, 100)

# Theoretical curve
fluor_theor = endpoint(popt, dnase_plot)

# Plot results
fig,ax=plt.subplots(figsize=(6,6))
fluor = df_sybr_ctrl_endpoint['end/start'].values
dnase_amts = df_sybr_ctrl_endpoint.dnaseI.values

ax.plot(dnase_amts, fluor, marker='.', 
         linestyle='none', markersize=10, alpha=0.25)

# Plot the result
ax.plot(dnase_plot, fluor_theor, color=sns.color_palette()[1])
ax.set_xlabel('dnaseI (U)',fontsize=14)
ax.set_ylabel('initial/final fluorescence',fontsize=14)
ax.legend(['standards','fit'],fontsize=12)
plt.savefig('20211114_dnase_standards_model_fit.pdf')

Below we plot the timecourse fluorescence data for the standards shown in Supplementary Figure 30A, and the plot above shown in Supplementary Figure 30B.

In [12]:
fig, ax = plt.subplots(1,2,figsize=(9,4))

df = df_sybr_ctrl
dnase_amts = df.dnaseI.unique()
for rep in [1,2,3]:
    for i, dnase in enumerate(dnase_amts):
        time = df.loc[(df.dnaseI==dnase)&(df.rep==rep),'Time (hr)']
        y = df.loc[(df.dnaseI==dnase)&(df.rep==rep),'Measurement']        
        ax[0].plot(time,y,color=sns.color_palette()[i])
ax[0].set_title('JS006',fontsize=14)
ax[0].legend(dnase_amts,title='dnase (U)',fontsize=12,title_fontsize=12)
ax[0].set_xlabel('Time (hr)',fontsize=14)
ax[0].set_ylabel('fluorescence (a.u.)',fontsize=14)

# Values of droplet diameter to plot
dnase_plot = np.linspace(0, 5, 100)

# Theoretical curve
fluor_theor = endpoint(popt, dnase_plot)

# Plot results
fluor = df_sybr_ctrl_endpoint['end/start'].values
dnase_amts = df_sybr_ctrl_endpoint.dnaseI.values
ax[1].plot(dnase_amts, fluor, marker='.', 
         linestyle='none', markersize=10, alpha=0.25,color=sns.color_palette()[0])

# Plot the result
ax[1].plot(dnase_plot, fluor_theor, color=sns.color_palette()[1])
ax[1].set_xlabel('dnaseI (U)',fontsize=14)
ax[1].set_ylabel('initial/final fluorescence',fontsize=14)
ax[1].legend(['standards','fit'],fontsize=12)
plt.savefig('20211114_dnase_standards_model_fit_standards_timecourse.pdf')


With the standards and fitted equation looking good, now we will load and analyze the data from experimental samples.

In [13]:
df_sybr = dnase_data.loc[(dnase_data.Channel=='sybr')&(dnase_data.Gain==60),:]
df_sybr_endpoint = df_sybr.loc[df_sybr['Time (hr)']==t_endpoint,:]
df_sybr_start = df_sybr.loc[df_sybr['Time (hr)']==t_start,:]
df_sybr_endpoint['end/start'] = df_sybr_endpoint.Measurement.values/df_sybr_start.Measurement.values
fluor_ep = df_sybr_endpoint['end/start'].values
df_sybr_endpoint['dnase_calc'] = (-1/(2*k)*np.log(fluor_ep+bg))**(1/n)
df_sybr_endpoint['dnaseI U/L '] = df_sybr_endpoint['dnase_calc']*df_sybr_endpoint['dilution']/10*df_sybr_endpoint['lysate_vol']*1000/25
df_sybr_endpoint['dnaseI U/gCW'] = df_sybr_endpoint['dnase_calc']*df_sybr_endpoint['dilution']/10*df_sybr_endpoint['lysate_vol']/df_sybr_endpoint['weight']*1000
df_sybr_endpoint_avg = df_sybr_endpoint.groupby(['strain','dnaseI','iptg','sal','col','growth','weight','lysate_vol','dilution'],as_index=False)['dnase_calc'].mean()
df_sybr_endpoint_avg['dnaseI U/L '] = df_sybr_endpoint_avg['dnase_calc']*df_sybr_endpoint_avg['dilution']/10*df_sybr_endpoint_avg['lysate_vol']*1000/25
df_sybr_endpoint_avg['dnaseI U/gCW'] = df_sybr_endpoint_avg['dnase_calc']*df_sybr_endpoint_avg['dilution']/10*df_sybr_endpoint_avg['lysate_vol']/df_sybr_endpoint_avg['weight']*1000
df_sybr_endpoint_avg = df_sybr_endpoint_avg.drop(index=df_sybr_endpoint_avg.loc[(df_sybr_endpoint_avg.strain=='JS006')&\
                                                                                (df_sybr_endpoint_avg.dnaseI>0),:].index) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/

Below we plot the experimental data shown in Supplementary Figure 30C

In [14]:
fig, ax = plt.subplots(1,3,sharex=True, sharey=True,figsize=(15,4))
df = df_sybr
for i, col in enumerate([1,2,3]):
    ax[i].set_xlabel('Time (hr)',fontsize=14)
    for rep in [1,2,3]:
        time = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==0)&\
                      (df.sal==0)&\
                      (df.growth==1),'Time (hr)']
        y = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==0)&\
                      (df.sal==0)&\
                      (df.growth==1),'Measurement']
        ax[i].plot(time,y,'-',color=sns.color_palette()[0])
        time = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==10)&\
                      (df.sal==20)&\
                      (df.growth==1),'Time (hr)']
        y = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==10)&\
                      (df.sal==20)&\
                      (df.growth==1),'Measurement']
        ax[i].plot(time,y,'--',color=sns.color_palette()[0])
        time = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==10)&\
                      (df.sal==20)&\
                      (df.growth==2),'Time (hr)']
        y = df.loc[(df.strain=='split pir 2x bxb1')&\
                      (df.col==col)&\
                      (df.rep==rep)&\
                      (df.iptg==10)&\
                      (df.sal==20)&\
                      (df.growth==2),'Measurement']
        ax[i].plot(time,y,'--',color=sns.color_palette()[1])
    ax[i].legend(['-iptg/sal (g1)','+iptg/sal (g1)','+iptg/sal (g2)'],fontsize=12,loc='best')
    ax[i].set_title(f'split pir diff colony #{col}',fontsize=14)

ax[0].set_ylabel('fluorescence (a.u.)',fontsize=14)
# plt.savefig('20211114_dnase_assay_lysate_undiluted_timecourse.pdf')

Text(0, 0.5, 'fluorescence (a.u.)')

To more accurately assess the dnaseI activity in the lysates, we did this assay again on 1:50 diluted samples from the first growth in addition to the original 1:10 dilution. We import this data and anayze it below.

In [15]:
# btek.tidy_biotek_data('./20211117_dnase_assay_lysate_dilutions.csv',convert_to_uM=False,volume=300)
btek.tidy_biotek_data('./20211117_dnase_assay_lysate_dilutions.csv',convert_to_uM=False,volume=300,supplementary_filename='20211117_metadata_dilutions.csv')
dnase_dil_data = pd.read_csv('20211117_dnase_assay_lysate_dilutions_tidy.csv')

  "concentrations.") % line[1])


In [16]:
# load data and determine dnaseI concentration using the equation fitted to standards
df_sybr_dil = dnase_dil_data.loc[(dnase_dil_data.Channel=='sybr')&(dnase_dil_data.Gain==60),:]
df_sybr_dil_endpoint = df_sybr_dil.loc[df_sybr_dil['Time (hr)']==t_endpoint,:]
df_sybr_dil_start = df_sybr_dil.loc[df_sybr_dil['Time (hr)']==t_start,:]
df_sybr_dil_endpoint['end/start'] = df_sybr_dil_endpoint.Measurement.values/df_sybr_dil_start.Measurement.values
fluor_ep = df_sybr_dil_endpoint['end/start'].values
df_sybr_dil_endpoint['dnase_calc'] = (-1/(2*k)*np.log(fluor_ep+bg))**(1/n)
df_sybr_dil_endpoint['dnaseI U/L '] = df_sybr_dil_endpoint['dnase_calc']*df_sybr_dil_endpoint['dilution']/10*df_sybr_dil_endpoint['lysate_vol']*1000/25
df_sybr_dil_endpoint['dnaseI U/gCW'] = df_sybr_dil_endpoint['dnase_calc']*df_sybr_dil_endpoint['dilution']/10*df_sybr_dil_endpoint['lysate_vol']/df_sybr_dil_endpoint['weight']*1000
df_sybr_dil_endpoint_avg = df_sybr_dil_endpoint.groupby(['strain','dnaseI','iptg','sal','col','growth','weight','lysate_vol','dilution'],as_index=False)['Measurement'].mean()
df_sybr_dil_start_avg = df_sybr_dil_start.groupby(['strain','dnaseI','iptg','sal','col','growth','weight','lysate_vol','dilution'],as_index=False)['Measurement'].mean()
fluor_ep = df_sybr_dil_endpoint_avg.Measurement.values
fluor_0 = df_sybr_dil_start_avg.Measurement.values
df_sybr_dil_endpoint_avg['dnase_calc'] = (-1/(2*k)*np.log((fluor_ep+bg)/fluor_0))**(1/n)
df_sybr_dil_endpoint_avg['dnaseI U/L '] = df_sybr_dil_endpoint_avg['dnase_calc']*df_sybr_dil_endpoint_avg['dilution']/10*df_sybr_dil_endpoint_avg['lysate_vol']*1000/25
df_sybr_dil_endpoint_avg['dnaseI U/gCW'] = df_sybr_dil_endpoint_avg['dnase_calc']*df_sybr_dil_endpoint_avg['dilution']/10*df_sybr_dil_endpoint_avg['lysate_vol']/df_sybr_dil_endpoint_avg['weight']*1000

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexin

Make plot as shown in Supplementary Figure 30D

In [17]:
fig, ax = plt.subplots(1,3,sharex=True, sharey=True,figsize=(15,4))
df = df_sybr_dil
for rep in [1,2,3]:
    for i, col in enumerate([1,2,3]):
        ax[i].set_title(f'split pir diff colony #{col}',fontsize=14)
        ax[i].set_xlabel('Time (hr)',fontsize=14)
        for j, dilution in enumerate([10,50]):
            time = df.loc[(df.strain=='split pir 2x bxb1')&\
                          (df.col==col)&\
                          (df.rep==rep)&\
                          (df.iptg==10)&\
                          (df.sal==20)&\
                          (df.growth==1)&\
                          (df.dilution==dilution),'Time (hr)']
            y = df.loc[(df.strain=='split pir 2x bxb1')&\
                          (df.col==col)&\
                          (df.rep==rep)&\
                          (df.iptg==10)&\
                          (df.sal==20)&\
                          (df.growth==1)&\
                          (df.dilution==dilution),'Measurement']
            ax[i].plot(time,y,'-',color=sns.color_palette()[j],alpha=0.6)
           
        # plot JS006 negative control on each plot
        for j, dilution in enumerate([10,50]):
            time = df.loc[(df.strain=='JS006')&\
                          (df.rep==rep)&\
                          (df.dilution==dilution),'Time (hr)']
            y = df.loc[(df.strain=='JS006')&\
                       (df.rep==rep)&\
                       (df.dilution==dilution),'Measurement']
            ax[i].plot(time,y,'--',color=sns.color_palette()[j],alpha=0.6)
ax[0].legend(['1:10','1:50','NC 1:10','NC 1:50'],fontsize=12,loc='best',title='dilution',title_fontsize=12)
ax[1].legend(['1:10','1:50','NC 1:10','NC 1:50'],fontsize=12,loc='best',title='dilution',title_fontsize=12)
ax[2].legend(['1:10','1:50','NC 1:10','NC 1:50'],fontsize=12,loc='best',title='dilution',title_fontsize=12)
        
        
plt.savefig('20211114_dnase_assay_lysate_diluted_timecourse.pdf')

Now we make a data frame using the 1:50 dilution data for the induced samples, and the 1:10 dilution data from the uninduced and JS006 negative control samples.

In [18]:
# create data frame with experimental data, using the 1:50 dilution for induced samples, 
# and the original 1:10 dilution for the uninduced samples and Js006 negative control
df_sybr_dil_endpoint_plotting = df_sybr_dil_endpoint.loc[(df_sybr_dil_endpoint.dilution==50)&\
                                                                 (df_sybr_dil_endpoint.strain!='JS006'),:]
df_sybr_endpoint_plotting = df_sybr_endpoint.loc[(df_sybr_endpoint.growth==1)&\
                                                 (df_sybr_endpoint.dnaseI==0)&\
                                                 (df_sybr_endpoint.iptg==0),:]
df_sybr_endpoint_plotting_2 = df_sybr_endpoint.loc[(df_sybr_endpoint.growth==2),:]
df_sybr_plotting = pd.concat([df_sybr_endpoint_plotting,
                              df_sybr_dil_endpoint_plotting])
df_sybr_all = pd.concat([df_sybr_endpoint_plotting,
                         df_sybr_dil_endpoint_plotting,
                        df_sybr_endpoint_plotting_2])

In [19]:
# calculate average dnaseI activity, and use volume of culture, cell weight weight, 
# and volume lysate to put this in units of dnaseI U/L and U/gCW
df_sybr_all_avg = df_sybr_all.groupby(['strain', 'col', 'iptg', 'sal',
       'dnaseI', 'growth', 'weight', 'lysate_vol', 'dilution'],as_index=False)['dnase_calc'].mean()
df_sybr_all_avg['dnaseI U/L '] = df_sybr_all_avg['dnase_calc']*df_sybr_all_avg['dilution']/10*df_sybr_all_avg['lysate_vol']*1000/25
df_sybr_all_avg['dnaseI U/gCW'] = df_sybr_all_avg['dnase_calc']*df_sybr_all_avg['dilution']/10*df_sybr_all_avg['lysate_vol']/df_sybr_all_avg['weight']*1000

In [20]:
df_sybr_all_avg.to_csv('20211114_dnase_assay_all_calulated_avg.csv')
df_sybr_all.to_csv('20211114_dnase_assay_all_calulated.csv')

In [21]:
# for ease of plotting JS006 will be labeld as colony 4. The final plot as shown in Figure 4 is finished in Affinity Designer
df_sybr_plotting.loc[df_sybr_plotting.strain=='JS006','col'] = 4

Below will make the plot shown in Figure 4B, note the colors of colony 4 which is the JS006 lysate negative control are changed in the final figure.

In [22]:
sns.set_palette('colorblind',4)
fig, ax = plt.subplots(figsize=(6,5))  
strains = np.array(['1x diff 2x bxb1','split pir 2x bxb1'])

sns.stripplot(x="col", y='dnaseI U/gCW', hue="sal", data=df_sybr_plotting,dodge=True)
sns.pointplot(x="col", y="dnaseI U/gCW", hue="sal", data=df_sybr_plotting, dodge=0.8 -0.8/2,
              join=False, palette="dark",
              markers="d", scale=1, ci=None)
ax.set_yscale('log')
ax.set_ylim(5,)
plt.savefig('20220810_dnase_plot_fig4.pdf')