In the this jupyter notebook, we analyze the plate reader and flow cytometry data associated with Supplementary Figure 10 chacterizing the dose response of integrase induction with salicylate on differentiated cell fraction for 1x and 2x differentiation strains. We also analyze the selective plating data from Supplementary Figure 16 and generate the associated plot.

In [1]:
import numpy as np
import pandas as pd
import scipy.optimize
import itertools
import sys
import io
import collections
import warnings
import scipy.interpolate
import csv
import os
import math
import seaborn as sns
from collections import namedtuple
import pkg_resources
from datetime import datetime
from copy import deepcopy as dc
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns
from cycler import cycler
import numpy as np
%matplotlib qt5

sns.set_context("talk", font_scale=1.5, rc={"lines.linewidth": 1.5})
sns.set_style("ticks")
sns.set_style({"xtick.direction": "in","ytick.direction": "in"})

%config InlineBackend.figure_f.ormats=['svg']

mpl.rc('axes', prop_cycle=(cycler('color', ['r', 'k', 'b','g','y','m','c']) ))

mpl.rcParams['pdf.fonttype'] = 42
mpl.rcParams['ps.fonttype'] = 42

tw = 1.5
sns.set_style({"xtick.major.size": 3, "ytick.major.size": 3,
               "xtick.minor.size": 2, "ytick.minor.size": 2,
               'axes.labelsize': 16, 'axes.titlesize': 16,
               'xtick.major.width': tw, 'xtick.minor.width': tw,
               'ytick.major.width': tw, 'ytick.minor.width': tw})

mpl.rc('xtick', labelsize=14) 
mpl.rc('ytick', labelsize=14)
mpl.rc('axes', linewidth=1.5)
mpl.rc('legend', fontsize=14)
mpl.rc('figure', figsize=(8.5,15))

Below we define functions for reading csvs of time course plate reader data from a Tecan Infinite 200Pro. Much of this code was adopted from murraylab_tools.biotek to work with Tecan csvs. Can instead read in tidy dfs below.

In [2]:
ReadSet = collections.namedtuple('ReadSet', ['name','number','mode','excitation','emission','num_flashes','gain'])
def mt_open(filename, setting_code):
    return_file = io.open(filename, setting_code,
                          encoding = "latin-1")# encoding)
    return return_file
def read_supplementary_info(input_filename):
    info = dict()
    with mt_open(input_filename, 'rU') as infile:
        reader = csv.reader(infile)
        title_line = next(reader)
        title_line = list(map(lambda s:s.strip(), title_line))
        for i in range(1, len(title_line)):
            info[title_line[i]] = dict()
        for line in reader:
            line = list(map(lambda s:s.strip(), line))
            if len(line) == 0 or line[0].strip() == "":
                continue
            for i in range(1, len(title_line)):
                info[title_line[i]][line[0]] = line[i]
    return info
def tidy_tecan_data(input_filename, read_names,supplementary_filename = None):
    '''
    Convert the raw output from a Biotek plate reader into tidy data.
    Optionally, also adds columns of metadata specified by a "supplementary
    file", which is a CSV spreadsheet mapping well numbers to metadata.
    Arguments:
        --input_filename: Name of a Tecab output file. Data file should be
                            standard excel output files, saved as a CSV.
        --supplementary_filename: Name of a supplementary file. Supplementary
                                    file must be a CSV wit a header, where the
                                    first column is the name of the well,
                                    additional columns define additional
                                    metadata, and each row after the header is a
                                    single well's metadata. Defaults to None
                                    (no metadata other than what can be mined
                                    from the data file).
    Returns: None
    Side Effects: Creates a new CSV with the same name as the data file with
                    "_tidy" appended to the end. This new file is in tidy
                    format, with each row representing a single channel read
                    from a single well at a single time.
    '''

    rows = ['A','B','C','D','E','F','G','H']
    cols = ['1','2','3','4','5','6','7','8','9','10','11','12']
    well_list = [row+col for row in rows for col in cols]
    supplementary_data = dict()
    if supplementary_filename:
        supplementary_data = read_supplementary_info(supplementary_filename)
    filename_base   = input_filename.rsplit('.', 1)[0]
    output_filename = filename_base + "_tidy.csv"

#     if calibration_dict is None:
#         calibration_dict = calibration_data()

    # If the user gave you an excel file, convert it to a CSV so we can read
    # it properly.
    file_extension = input_filename.rpartition(".")[2]
    if file_extension.startswith("xls"):
        excel_filename = input_filename
        input_filename = excel_filename.rpartition(".")[0] + ".csv"
        pd.read_excel(excel_filename).to_csv(input_filename, index = False)

    # Open data file and tidy output file at once, so that we can stream data
    # directly from one to the other without having to store much.
   
    with mt_open(input_filename, 'rU') as infile:
        with mt_open(output_filename, 'w') as outfile:
            # Write a header to the tidy output file.
            read_num = 0
            reader = csv.reader(infile)
            writer = csv.writer(outfile, delimiter = ',')
            title_row = ['channel','mode','excitation','emission','num_flashes','gain','time_sec','time_h','well','measurement']
#             ['channel', 'gain', 'Time (sec)', 'Time (hr)', 'Well',
#                          'measurement', 'Units', 'Excitation', 'Emission']
            for name in supplementary_data.keys():
                title_row.append(name)
#             title_row.append('ChanStr')
            writer.writerow(title_row)

            # Read plate information
            # Basic reading flow looks like:
            #   1) Read lines until a line that reads "Read", recording
            #       information about plate reader ID.
            #   2) For each line until the next line that reads "Layout":
            #       2.1) Look for a line starting with "Filter Set:"
            #       2.2) Get read set information from next two lines, store it.
            #   3) Read lines until the line that reads "Layout", looking for
            #       information about read settings.
            #   4) For each line:
            #       4.1) If line contains information about this read setting,
            #               store it.
            #       4.2) If line is the start of data, then for each line until
            #             empty line:
            #           4.2.1) Rewrite data on that line to tidy data file,
            #                   converting to uM if possible.
            read_sets = dict()
            read_set_idxs = dict()
            next_line = ""
            end_reads = False
            while end_reads==False:
                if read_num == len(read_names):
                    end_reads = True
                    continue
                if next_line != "":
                    line = next_line
                    next_line = ""
                else:
                    try:
                        line = next(reader)
                    except StopIteration:
                        break
                for a in line:
                    if len(a):
                        break
                else:
                    continue
                if line[0] == "Mode":
                    mode = line[4]
                    read_name = read_names[read_num]
                    read_num += 1

                    # Process all the information for this read set into one
                    # solid text block, which we will search for relevant
                    # information
                    read_information_block = ""
                    for line in reader:
                        if mode=='Absorbance':
                            if line[0] == 'Measurement Wavelength':
                                emission = -1
                                excitation = line[4]
                                gain = -1
                                continue
                        if line[0]=='Excitation Wavelength':
                            excitation = line[4]
                            continue
                        if line[0]=='Gain':
                            gain = line[4]
                            continue
                        if line[0]=='Emission Wavelength':
                            emission = line[4]
                            continue
                        if line[0]=='Number of Flashes':
                            num_flashes=line[4]
                        if line[0]=='Settle Time':
                            break
                        if line[0]=='Start Time:':
                            end_reads = True
                    if not read_name in read_sets:
                        read_sets[read_name] = []
                        read_set_idxs[read_name] = 0
                    read_sets[read_name].append(ReadSet(read_name,read_num,mode,excitation,
                                                    emission, num_flashes,gain))
#                     print(read_sets[read_name])
                if end_reads:

                    break

            # Read data blocks
            # Find a data block
            end_data=False
            while end_data==False:
                for a in line:
                    if len(a):
                        break
                else:
                    line = next(reader, None)
                    continue
                if line[0] in read_names:
                    read_name = line[0]
                    read_properties    = read_sets[read_name][0]
                    gain = read_properties.gain
                    excitation = read_properties.excitation
                    emission = read_properties.emission
                    num_flashes = read_properties.num_flashes
                    read_num = read_properties.number
                    mode = read_properties.mode

                    line = next(reader) # Skip a line
                    line = next(reader) # Chart title line
                    times = line[1:]
                    line = next(reader)
                    # Data lines
                    for line in reader:
                        if line[0] in well_list:
                            well_name = line[0]
                        else:
                            break
                        if supplementary_filename and \
                          not well_name in list(supplementary_data.values())[0]:
                            warnings.warn("No supplementary data for well " + \
                                        "%s; throwing out data for that well."\
                                          % well_name)
                            continue
                        for idx, t in enumerate(times):
                            if len(t)==0:
                                continue
                            t_secs = float(t)
                            t_hrs = t_secs/60/60
                            measurement = float(line[idx+1])
                            row = [read_name, mode, str(excitation),str(emission), num_flashes, gain, 
                                   t_secs, t_hrs, well_name, measurement]
                            for name in supplementary_data.keys():
                                row.append(supplementary_data[name][well_name])
#                             row.append(read_name + str(gain) + str(excitation) +\
#                                        str(emission))
                            try:
                                writer.writerow(row)
                            except TypeError as e:
                                print("Error writing line: " + str(row))
                                raise e
                        
                    if read_num >= len(read_names):
                        end_data=True
                line = next(reader, None)
def background_subtract_df(df, bg_df, by_well=True):
    channels = df.channel.unique()
    df_bs = df.copy()
    for channel in channels:
        for gain in df.loc[df.channel==channel,'gain'].unique():
            if by_well:
                for well in df.well.unique():
                    df_bs.loc[(df_bs.channel==channel)&\
                              (df_bs.gain==gain)&\
                              (df_bs.well==well),'measurement'] = \
                    df.loc[(df.channel==channel)&\
                              (df.gain==gain)&\
                              (df.well==well),'measurement'] -\
                    bg_df.loc[(bg_df.channel==channel)&\
                              (bg_df.gain==gain)&\
                              (bg_df.well==well),'measurement'].mean()
            else:
                df_bs.loc[(df_bs.channel==channel)&\
                              (df_bs.gain==gain),'measurement'] = \
                    df.loc[(df.channel==channel)&\
                              (df.gain==gain),'measurement'] -\
                    bg_df.loc[(bg_df.channel==channel)&\
                              (bg_df.gain==gain),'measurement'].mean()
    return df_bs
        
def normalize(df, norm_channel = "OD700", norm_channel_gain = -1):
    '''
    Normalize expression measurements by dividing each measurement by the value
    of a reference channel at that time (default OD600).
    Args:
        df - DataFrame of time traces, of the kind produced by tidy_biotek_data.
        norm_channel - Name of a channel to normalize by. Default "OD600"
        norm_channel_gain - gain of the channel you want to normalize by.
                            Default -1 (for OD600).
    Returns:
        A DataFrame of df augmented with columns for normalized AFU/uM
        ("Normalized measurement"). Will have units of
        "<measurement units>/<normalization units>", or "<measurement units>/OD"
        if normalizing with an OD.
    '''
    # Do some kind of check to make sure the norm channel exists with the given
    # channel...
    if not norm_channel in df.channel.unique():
        raise ValueError("No data for channel '%s' in dataframe." % \
                         norm_channel)
    if not norm_channel_gain in df[df.channel == norm_channel].gain.unique():
        raise ValueError("channel %s does not use gain %d." % \
                         (norm_channel, norm_channel_gain))

    # Iterate over channels/gains, applying normalization
#     OD_channel_string = df[(df.channel == norm_channel) & \
#                  (df.gain== norm_channel_gain)].ChanStr.unique()[0]
    od_df = df[df.channel == norm_channel].reset_index()
    #dflst = []
    channel_list = df.channel.unique().tolist()
    del channel_list[channel_list.index(norm_channel)]
    dflist = [df[df.channel == a].reset_index() for a in channel_list]
    normalized_df = od_df.copy()
    for channel_df in dflist:
        channel_df.measurement = channel_df.measurement/od_df.measurement
        norm_units = "OD" if norm_channel.startswith("OD") \
                          else 'afu'
#         channel_df.Units = "%s/%s" % ('afu', norm_units)
        normalized_df = normalized_df.append(channel_df,ignore_index=True)
    normalized_df.reset_index()

    return normalized_df

In [3]:
tidy_tecan_data('20220604_differentiation_p1.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')
tidy_tecan_data('20220604_differentiation_p1_blankread.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')
tidy_tecan_data('20220604_differentiation_p2.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')
tidy_tecan_data('20220604_differentiation_p2_blankread.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')
tidy_tecan_data('20220604_differentiation_p3.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')
tidy_tecan_data('20220604_differentiation_p3_blankread.csv',read_names=['OD700','mScarlet','GFP_1'],
                supplementary_filename='20220604_metadata.csv')

  after removing the cwd from sys.path.


In [4]:
df_p1 = pd.read_csv('20220604_differentiation_p1_tidy.csv')
df_p1_bg = pd.read_csv('20220604_differentiation_p1_blankread_tidy.csv')
df_p2 = pd.read_csv('20220604_differentiation_p2_tidy.csv')
df_p2_bg = pd.read_csv('20220604_differentiation_p2_blankread_tidy.csv')
df_p3 = pd.read_csv('20220604_differentiation_p3_tidy.csv')
df_p3_bg = pd.read_csv('20220604_differentiation_p3_blankread_tidy.csv')

# didn't have correct gain of mscarlet for p1, using p2 bg instead
df_p1_bg.loc[df_p1_bg.channel=='mScarlet','measurement'] = df_p2_bg.loc[df_p2_bg.channel=='mScarlet','measurement'].mean()
df_p1_bg.loc[df_p1_bg.channel=='mScarlet','gain'] =140
df_p1_bs = background_subtract_df(df_p1,df_p1_bg,True)
df_p2_bs = background_subtract_df(df_p2,df_p2_bg,True)
df_p3_bs = background_subtract_df(df_p3,df_p3_bg,True)

In [5]:
# first timepoint data is dropped because read occured before shaking and is not accurate
df_p1_bs['plate'] = 1
df_p2_bs['plate'] = 2
df_p3_bs['plate'] = 3

df_p1_bs = df_p1_bs.drop(df_p1_bs.loc[df_p1_bs.time_h==df_p1_bs.time_h.min()].index.values)
df_p2_bs = df_p2_bs.drop(df_p2_bs.loc[df_p2_bs.time_h==df_p2_bs.time_h.min()].index.values)
df_p3_bs = df_p3_bs.drop(df_p3_bs.loc[df_p3_bs.time_h==df_p3_bs.time_h.min()].index.values)

df_all = pd.concat([df_p1_bs,df_p2_bs,df_p3_bs])

In [6]:
df_all.to_csv('figureS10_platereader_data.csv')

In [6]:
# OD normalize data, get endpoint data, avgs and stds
df_p1_bs_norm = normalize(df_p1_bs)
df_p1_bs_norm_GFP = df_p1_bs_norm.loc[df_p1_bs_norm.channel=='GFP_1',:]
df_p1_ep_GFP = df_p1_bs_norm_GFP.loc[df_p1_bs_norm_GFP.time_h==df_p1_bs_norm_GFP.time_h.max(),:]
df_p1_OD = df_p1_bs_norm.loc[df_p1_bs_norm.channel=='OD700',:]
df_p1_OD_ep = df_p1_OD.loc[df_p1_OD.time_h==df_p1_OD.time_h.max(),:]

df_p2_bs_norm = normalize(df_p2_bs)
df_p2_bs_norm_GFP = df_p2_bs_norm.loc[df_p2_bs_norm.channel=='GFP_1',:]
df_p2_ep_GFP = df_p2_bs_norm_GFP.loc[df_p2_bs_norm_GFP.time_h==df_p2_bs_norm_GFP.time_h.max(),:]
df_p2_OD = df_p2_bs_norm.loc[df_p2_bs_norm.channel=='OD700',:]
df_p2_OD_ep = df_p2_OD.loc[df_p2_OD.time_h==df_p2_OD.time_h.max(),:]

df_p3_bs_norm = normalize(df_p3_bs)
df_p3_bs_norm_GFP = df_p3_bs_norm.loc[df_p3_bs_norm.channel=='GFP_1',:]
df_p3_ep_GFP = df_p3_bs_norm_GFP.loc[df_p3_bs_norm_GFP.time_h==df_p3_bs_norm_GFP.time_h.max(),:]
df_p3_OD = df_p3_bs_norm.loc[df_p3_bs_norm.channel=='OD700',:]
df_p3_OD_ep = df_p3_OD.loc[df_p3_OD.time_h==df_p3_OD.time_h.max(),:]

df_bs_norm = pd.concat([df_p1_bs_norm,df_p2_bs_norm,df_p3_bs_norm])
df_gfp_norm_ep = pd.concat([df_p1_ep_GFP,df_p2_ep_GFP,df_p3_ep_GFP])
df_OD_ep = pd.concat([df_p1_OD_ep,df_p2_OD_ep,df_p3_OD_ep])

df_OD_ep_avg = df_OD_ep.groupby(['strain','iptg','sal','chlor','plate'],as_index=False)['measurement'].mean()
df_OD_ep_avg['std'] = df_OD_ep.groupby(['strain','iptg','sal','chlor','plate'])['measurement'].std().values
df_gfp_norm_ep_avg = df_gfp_norm_ep.groupby(['strain','iptg','sal','chlor','plate'],as_index=False)['measurement'].mean()
df_gfp_norm_ep_avg['std'] = df_gfp_norm_ep.groupby(['strain','iptg','sal','chlor','plate'])['measurement'].std().values

Below we adopt code from the murray_lab_tools.biotek package (courtesy of Sam Clamons) for fitting growth rates on all wells. We make the modification of using an exponential growth function, and trim data strictly by OD value before fitting.

In [7]:
def summarize_growth(channel_df, fixed_init = None, growth_threshold = None,
                     verbose = False, growth='logistic'):
    '''
    Summarizes the growth characteristics of OD curves from a dataframe of
    Biotek/platereader data. Performs the following summaries on each well:
        * Fits OD curve to a logistic-plus-floor or exponential-plus-floor, finding an initial value, a
            noise floor, a rate constant (R, not to be interpreted directly),
            and a population maximum. The rate parameter has time units of
            hours.
        * Optionally finds the time when the population crosses some fraction of
            maximum population. By default, does not calculate this -- set
            the growth_threshold parameter to add this calculation.
    Params:
        df -- A DataFrame of Biotek data with at least one channel of growth
                data.
        channel -- The name of the channel with growth data. Should be a channel
                    with only one Gain, or this will do weird things.
        growth_threshold -- If set, determines the fraction of of total
                                population to find the time of, i.e., if
                                growth_threshold = 0.25, this function will
                                report the time that each well crossed 25%
                                of the total population size for that well.
        fixed_init -- Sets a fixed value for the initial population parameter.
                        If None, this value is optimized with the rest of the
                        parameters.
        verbose -- Iff True, prints some hints about what it's doing. Use if it's taking
                    a while and you want to make sure it's making progress. Default False.
    Returns: A new dataframe where each row summarizes the growth
                characteristics of one from the original dataframe.
    '''

    # Split dataframe into a list of dataframes for individual wells.
    well_dfs = [channel_df[channel_df.well == w] \
                for w in channel_df.well.unique()]
    measurement_summary_rows = \
        list(map(lambda df: summarize_single_well_growth(df, growth_threshold,
                                                         fixed_init, verbose,growth),
                 well_dfs))
    return pd.DataFrame(measurement_summary_rows)

def summarize_single_well_growth(well_df, growth_threshold = None,
                                 fixed_init = None, verbose = False,growth='logistic'):
    '''
    Summarizes the growth characteristics of a single well's worth of dataframe,
    returning the results as a dictionary describing a single dataframe line.
    See summarize_growth for measurement details.
    This function is intended as a helper function for summarize_growth; it uses
    the helper function logistic_growth as a growth model.
    Params:
        well_df -- A DataFrame of Biotek data from a single well and a single
                channel (presumably an OD channel).
        growth_threshold -- If set, determines the fraction of of total
                                population to find the time of, i.e., if
                                growth_threshold = 0.25, this function will
                                report the time that each well crossed 25%
                                of the total population size for that well.
        fixed_init -- Sets a fixed value for the initial population parameter.
                        If None, this value is optimized with the rest of the
                        parameters.
        verbose -- Iff True, print some hints about how it's progressing.
    Returns: A dictionary containing the well name, growth characteristics, and
                any supplementary data from df.
    '''
    well_df.reset_index(inplace = True)
    if growth_threshold is not None:
        well_df = well_df.loc[well_df.measurement<growth_threshold,:]
    if verbose:
        print("Summarizing from well %s" % well_df.well[0])

    # Some empirically-reasonable guesses for most growth experiments.
    if growth == 'logistic':
        if fixed_init == None:
            param_guess = (1.3, 1, 0.05, 0)
            opt_func = logistic_growth
        else:
            param_guess = (1.3, 1, 0.05)
            opt_func = lambda t, r, c, f: logistic_growth(t, r, c, f, fixed_init)

        times = well_df["time_h"]

        opt_params = scipy.optimize.curve_fit(opt_func, times,
                                              well_df["measurement"],
                                              p0 = param_guess,
                                              maxfev = int(1e4))[0]

        # To keep parameters positive, logistic_growth uses the absolute value
        # of whatever parameters it gets, so optimization will sometimes return
        # negative parameter values; have to correct these.
        opt_params = np.abs(opt_params)

        return_dict = dict()
        return_dict["Rate"]  = opt_params[0]
        return_dict["Cap"]   = opt_params[1]
        return_dict["Floor"] = opt_params[2]
        if fixed_init == None:
            return_dict["Init"] = opt_params[3]
        else:
            return_dict["Init"] = fixed_init

        # Calculate threshold time, if it is specified
    #     if growth_threshold:
    #         raise NotImplementedError()

        # Add supplemental data.
        for column in well_df.columns.values:
            if column in ['channel', 'mode', 'excitation', 'emission', 'num_flashes', 'gain',
       'time_sec', 'time_h', 'measurement']:
                continue
            return_dict[column] = well_df[column][0]
    elif growth == 'exponential':
        if fixed_init == None:
            param_guess = (1.3, 0.001, 0)
            opt_func = exp_growth
        else:
            param_guess = (1.3, 0.001)
            opt_func = lambda t, r, f: exp_growth(t, r, f, fixed_init)

        times = well_df["time_h"]

        opt_params = scipy.optimize.curve_fit(opt_func, times,
                                              well_df["measurement"],
                                              p0 = param_guess,
                                              maxfev = int(1e4))[0]

        # To keep parameters positive, logistic_growth uses the absolute value
        # of whatever parameters it gets, so optimization will sometimes return
        # negative parameter values; have to correct these.
        opt_params = np.abs(opt_params)

        return_dict = dict()
        return_dict["Rate"]  = opt_params[0]
        return_dict["Floor"] = opt_params[1]
        if fixed_init == None:
            return_dict["Init"] = opt_params[2]
        else:
            return_dict["Init"] = fixed_init

        # Calculate threshold time, if it is specified
    #     if growth_threshold:
    #         raise NotImplementedError()

        # Add supplemental data.
        for column in well_df.columns.values:
            if column in ['channel', 'mode', 'excitation', 'emission', 'num_flashes', 'gain',
       'time_sec', 'time_h', 'measurement']:
                continue
            return_dict[column] = well_df[column][0]

    return return_dict

def logistic_growth(t, rate, cap, floor, init):
    '''
    Model function for logistic growth with a noise floor.
    Params:
        t -- Time.
        rate -- Growth rate parameter.
        init -- Initial population.
        cap -- Maximum population size.
        floor -- Noise floor (i.e., OD reading for zero cells).
    Returns: Model OD reading at the given time, for the given parameters.
    '''

    rate = np.abs(rate)
    init = np.abs(init)
    cap = np.abs(cap)
    floor = np.abs(floor)
    return floor + cap * init * np.exp(rate * t) \
            / (cap + init * (np.exp(rate * t) - 1))

def exp_growth(t, rate, floor, init):
    '''
    Model function for logistic growth with a noise floor.
    Params:
        t -- Time.
        rate -- Growth rate parameter.
        init -- Initial population.
        floor -- Noise floor (i.e., OD reading for zero cells).
    Returns: Model OD reading at the given time, for the given parameters.
    '''

    rate = np.abs(rate)
    init = np.abs(init)
    floor = np.abs(floor)
    return floor + init * np.exp(rate * t)

In [8]:
# determine growth rates using OD700<0.2 to fit exponential growth function
p1_growth_df = summarize_growth(df_p1_bs.loc[df_p1_bs.channel=='OD700',:],
                                  growth='exponential',growth_threshold=0.2)
p1_growth_df_avg = p1_growth_df.groupby(['strain','iptg','sal','chlor'],as_index=False)['Rate'].mean()
p1_growth_df_avg['std'] = p1_growth_df.groupby(['strain','iptg','sal','chlor'])['Rate'].std().values
p2_growth_df = summarize_growth(df_p2_bs.loc[df_p2_bs.channel=='OD700',:],
                                  growth='exponential',growth_threshold=0.2)
p2_growth_df_avg = p2_growth_df.groupby(['strain','iptg','sal','chlor'],as_index=False)['Rate'].mean()
p2_growth_df_avg['std'] = p2_growth_df.groupby(['strain','iptg','sal','chlor'])['Rate'].std().values
p3_growth_df = summarize_growth(df_p3_bs.loc[df_p3_bs.channel=='OD700',:],
                                  growth='exponential',growth_threshold=0.2)
p3_growth_df_avg = p3_growth_df.groupby(['strain','iptg','sal','chlor'],as_index=False)['Rate'].mean()
p3_growth_df_avg['std'] = p3_growth_df.groupby(['strain','iptg','sal','chlor'])['Rate'].std().values

In [10]:
p1_growth_df['plate'] = 1
p2_growth_df['plate'] = 2
p3_growth_df['plate'] = 3
p1_growth_df_avg['plate'] = 1
p2_growth_df_avg['plate'] = 2
p3_growth_df_avg['plate'] = 3
growth_df = pd.concat([p1_growth_df,p2_growth_df,p3_growth_df])
growth_df_avg = pd.concat([p1_growth_df_avg,p2_growth_df_avg,p3_growth_df_avg])

Now we will read in the flow cytometry data.

In [12]:
metadata = pd.read_csv('20220604_metadata.csv')
p1 = pd.read_csv('./flow_data/plate1_flow.csv')
p2 = pd.read_csv('./flow_data/plate2_flow.csv')
p3 = pd.read_csv('./flow_data/plate3_flow.csv')
metadata.drop(columns=['well'],inplace=True)
p1 = pd.concat([metadata,p1],axis=1)
p2 = pd.concat([metadata,p2],axis=1)
p3 = pd.concat([metadata,p3],axis=1)
df_flow = pd.concat([p1,p2,p3],axis=0)
df_flow['GFP+_percent'] = df_flow['mScarlet+/GFP+_percent'] + df_flow['mScarlet-/GFP+_percent']
df_flow_gfp_avg = df_flow.groupby(['strain','iptg','sal','chlor','Plate'],as_index=False)['GFP+_percent'].mean()
df_flow_gfp_avg['std'] =  df_flow.groupby(['strain','iptg','sal','chlor','Plate'])['GFP+_percent'].std().values

Below we generate the plots as shown in Supplementary Figure 10B-E. Blue is 10uM IPTG, orange is 50uM IPTG and plate generates 1-3 are plotted side by side and are light to dark. Mean +/- SD plotted for all, with individual replicates as small circles.

In [13]:
fig, ax = plt.subplots(4,4,sharex=True, sharey='row',figsize=(8.5,10))

iptgs = [10,50]
sals = [0,10,20,30]
chlors = ['-','+']
iptg_offsets = np.linspace(-1,1,6)*2.5
colors = ['#6baed6','#2171b5','#08306b','#fd8d3c','#d94801','#7f2704']
strains = ['diff1X','diff2X']

df = df_gfp_norm_ep
df_avg = df_gfp_norm_ep_avg
gfp_max = df_gfp_norm_ep_avg.measurement.max()
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[0,j*2+i].errorbar(df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'sal']+iptg_offsets[plot_num],
                            df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'measurement']/gfp_max,
                   yerr=df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'std']/gfp_max,
                   capsize=1.5,color=colors[plot_num],linestyle='None',marker='o',ms=3)
                plot_num+=1
        ax[0,j*2+i].set_title(f'{strain}:{chlor}chlor',fontsize=12)
# ax[0,0].legend(['1O-P1','10-P2','10-P3','5O-P1','50-P2','50-P3'],title='IPTG',fontsize=12,title_fontsize=12)
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[0,j*2+i].plot(df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'sal']+iptg_offsets[plot_num],
                             df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'measurement']/gfp_max,
                '.',color=colors[plot_num],linestyle='None',ms=2,alpha=0.5)

                plot_num+=1
ax[0,0].set_ylabel('GFP/OD (a.u.)',fontsize=12)      

df = df_flow
df_avg = df_flow_gfp_avg
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[1,j*2+i].errorbar(df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.Plate==plate),'sal']+iptg_offsets[plot_num],
                            df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.Plate==plate),'GFP+_percent'],
                   yerr=df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.Plate==plate),'std'],
                   capsize=1.5,color=colors[plot_num],linestyle='None',marker='o',ms=3)
                plot_num+=1
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[1,j*2+i].plot(df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.Plate==plate),'sal']+iptg_offsets[plot_num],
                             df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.Plate==plate),'GFP+_percent'],
                '.',color=colors[plot_num],linestyle='None',ms=2,alpha=0.5)

                plot_num+=1
ax[1,0].set_ylabel('GFP+ percent',fontsize=12) 

df = df_OD_ep
df_avg = df_OD_ep_avg               
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[2,j*2+i].errorbar(df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'sal']+iptg_offsets[plot_num],
                            df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'measurement'],
                   yerr=df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'std'],
                   capsize=1.5,color=colors[plot_num],linestyle='None',marker='o',ms=3)
                plot_num+=1
#         ax[j*2+i,0].legend(['1O-P1','10-P2','10-P3''5O-P1','50-P2','50-P3'],title='IPTG',fontsize=12,title_fontsize=12)
#         ax[2,j*2+i].set_title(f'{strain}:{chlor}chlor',fontsize=12)
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[2,j*2+i].plot(df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'sal']+iptg_offsets[plot_num],
                             df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'measurement'],
                '.',color=colors[plot_num],linestyle='None',ms=2,alpha=0.5)

                plot_num+=1
ax[2,0].set_ylabel('OD700',fontsize=12) 
ax[2,0].set_yticks([0,0.5,1])

df = growth_df
df_avg = growth_df_avg                
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[3,j*2+i].errorbar(df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'sal']+iptg_offsets[plot_num],
                            df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'Rate'],
                   yerr=df_avg.loc[(df_avg.strain==strain)&\
                                       (df_avg.chlor==chlor)&\
                                       (df_avg.iptg==iptg)&\
                                       (df_avg.plate==plate),'std'],
                   capsize=1.5,color=colors[plot_num],linestyle='None',marker='o',ms=3)
                plot_num+=1
for i, chlor in enumerate(chlors):
    for j, strain in enumerate(strains):
        plot_num = 0
        for k, iptg in enumerate(iptgs):
            for p, plate in enumerate([1,2,3]):
                ax[3,j*2+i].plot(df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'sal']+iptg_offsets[plot_num],
                             df.loc[(df.strain==strain)&\
                                    (df.chlor==chlor)&\
                                    (df.iptg==iptg)&\
                                    (df.plate==plate),'Rate'],
                '.',color=colors[plot_num],linestyle='None',ms=2,alpha=0.5)

                plot_num+=1
ax[3,0].set_ylabel('growthrate (1/h)',fontsize=12)   
ax[3,0].set_xticks([0,10,20,30])
ax[3,0].set_xlabel(f'Sal conc. ($\mu$M)',fontsize=12)
ax[3,1].set_xlabel(f'Sal conc. ($\mu$M)',fontsize=12)
ax[3,2].set_xlabel(f'Sal conc. ($\mu$M)',fontsize=12)
ax[3,3].set_xlabel(f'Sal conc. ($\mu$M)',fontsize=12)
# plt.savefig('20220728_epGFP_fracGFP_epOD_growthrate_wlegend.pdf',transparent=True)
plt.savefig('20220728_epGFP_fracGFP_epOD_growthrate.pdf',transparent=True)

Below we analyze data from selective plating experiment performed on glycerol stocks from the endpoint of the experiment described in Figure 2. We load in a summary csv and generate the plot shown in Supplementary Figure 16B.

In [14]:
plating_df = pd.read_csv('colony_plating_summmary.csv')
plating_df_tidy = pd.melt(plating_df,
                       ['Replicate','Strain'],
                       var_name="population",
                       value_name="proportion")
plating_df_tidy.dropna(inplace=True)
sns.set_palette('colorblind',4)

fig, ax = plt.subplots()

sns.barplot(data=plating_df_tidy,x='Strain',y='proportion',hue='population',dodge=True,ax=ax)
sns.stripplot(data=plating_df_tidy,x='Strain',y='proportion',hue='population',dodge=True,ax=ax,color='black')
plt.savefig('20220809_plating_plot.pdf')