#### This notebook generates the plots for Figure 8 and Supplementary Figure 8.1. There is more data here than we discuss in the paper, specifically from experiments where we supplemented the chambers with acetate. We weren't really able to interpret these results within the scope of this paper, but you're welcome to see them discussed more in my dissertation: https://www.proquest.com/docview/2838439708

In [41]:
import numpy as np
import pandas as pd
import bokeh.io
import bokeh.layouts
import bokeh.palettes
import bokeh.plotting
import bokeh.application
import bokeh.application.handlers
from bokeh.models import Legend
from bokeh.models import Range1d
import re
import scipy.stats as st
import glob
import os
import sys
import numba



bokeh.io.output_notebook()

palette = bokeh.palettes.Colorblind6

#### Borrowing functions and strategy from Jusin Bois (https://bebi103a.github.io/lessons/17/hacker_nhst.html?highlight=bootstrap) and settling on the algorithm described by Efron and Tibshirani from Efron B, Tibshirani R. An introduction to the bootstrap. Nachdr. Boca Raton, Fla.: Chapman & Hall; 1998. 436 p. (Monographs on statistics and applied probability) (Algorithm 16.2).

In [2]:
def shift_means(x, y):
    """Shift distributions to have the same mean (null hypothesis
    is that means are equal)"""
    
    total_mean = np.mean(np.concatenate((x, y)))
    x_shift = x - np.mean(x) + total_mean
    y_shift = y - np.mean(y) + total_mean
    
    return x_shift, y_shift

@numba.njit
def draw_bs_sample(data):
    """Draw a bootstrap sample from a 1D data set."""
    return np.random.choice(data, size=len(data))

@numba.njit
def draw_bs_reps_diff_mean(x, y, size=1):
    """
    Generate bootstrap replicates with difference of means
    as the test statistic.
    """
    out = np.empty(size)
    for i in range(size):
        out[i] = np.mean(draw_bs_sample(x)) - np.mean(draw_bs_sample(y))

    return out

def bootstrap_hypothesis_test_mean(set1, set2, num_permutations=1000000):
    
    # pdb.set_trace()
    if (np.isnan(set1)).all() or (np.isnan(set2)).all():
        return None
    set1 = set1[~np.isnan(set1)]
    set2 = set2[~np.isnan(set2)]
    
    if (len(set1) < 3) or (len(set2) < 3):
        return None
    
    diff_mean = np.mean(set1) - np.mean(set2)
    
    shift_set1, shift_set2 = shift_means(set1, set2)
    
    bs_reps = draw_bs_reps_diff_mean(shift_set1, shift_set2, size=num_permutations)
    p_val = np.sum(np.abs(bs_reps) >= np.abs(diff_mean)) / len(bs_reps)
    return p_val


@numba.jit
def t_stat(set1, set2, size=1):
    
    out = np.empty(size)
    for i in range(size):
        t = ( np.mean(draw_bs_sample(set1)) - np.mean(draw_bs_sample(set2)) ) / np.sqrt( np.var(set1)/len(set1) + np.var(set2)/len(set2) )
        out[i] = np.abs(t)

    return out

    
def bootstrap_efron_tibshirani(set1, set2, num_permutations=1000000):
    """
    
    Efron B, Tibshirani R. An introduction to the bootstrap. Nachdr. Boca Raton, Fla.: Chapman & Hall; 1998. 
    436 p. (Monographs on statistics and applied probability). 

    
    https://en.wikipedia.org/wiki/Bootstrapping_(statistics)#Bootstrap_hypothesis_testing
    """
    
    if (np.isnan(set1)).all() or (np.isnan(set2)).all():
        return None
    set1 = set1[~np.isnan(set1)]
    set2 = set2[~np.isnan(set2)]
    
    if (len(set1) < 3) or (len(set2) < 3):
        return None
    
    t = ( np.mean(set1) - np.mean(set2) ) / np.sqrt( np.var(set1)/len(set1) + np.var(set2)/len(set2) )
    # print(t)
    
    shift_set1, shift_set2 = shift_means(set1, set2)
    # print(shift_set1)
    # print(shift_set2)
    
    t_boots = t_stat(shift_set1, shift_set2, size=num_permutations)
    # print(t_boots)
    
    p_val = np.sum(np.abs(t_boots) >= np.abs(t)) / len(t_boots)
    
    return p_val
    
    

##### Write a function to get data files based on date and desired data

In [3]:
def get_data(rel_path, data_type, date, exp_date):
    """
    Function to retrieve desired data: CFUs, electrolysis, ATP, or nitrite.
    This function requires that the data file be in .csv format.
    
    Parameters:
    -----------
    rel_path : str
        The relative path to follow to find the data
    data_type : str
        The piece of data that you want to retrieve. Possible inputs are
        'CFUs', 'EChem', 'ATP', and 'Nitrite'
    date : str
        The date on which the experiment began in the format YYYY_MM_DD.
        Used to find files
    exp_date : str
        The date that I want to use to label the data. In the case of the
        potentiostat data, this is different from the date on which the 
        potentiostat was started because that is a day before inoculation.
        I.e., if the all the CFUs, ATP measurements, etc. begin on 2019_10_20,
        the potentiostat began on 2019_10_19, but I want to label these data
        with the same date. Format YYYY_MM_DD.
        
    Returns:
    --------
    df : pandas DataFrame
        The dataframe containing the desired data
    """

    allowed_data_types = ["CFUs", "EChem", "ATP", "Nitrite"]
    assert (
        data_type in allowed_data_types
    ), f"Your data type is not in {allowed_data_types}"

    # The potentiostat data comes out as .txt files with tab separated columns
    if data_type == "EChem":
        path = os.path.join(rel_path, data_type, f"{date}*.txt")
    else:
        path = os.path.join(rel_path, data_type, f"{date}*.csv")

    files = glob.glob(path)

    # The potentiostat generates up to four data files, one for each lead, but
    # it is critical to check that a single data file is defined by the input
    # for the other measurements because I have had to requantify some data files,
    # and so there are some files that share dates, for example.

    if data_type != "EChem":
        assert (
            len(files) == 1
        ), f"Your input did not define a unique data file for ATP, NO2, or CFU results: {files}"
        
        df = pd.read_csv(files[0])
        
        if data_type == 'CFUs':
            df['chamber'] = [f"Chamber {c}" for c in df["chamber"].values]
            df['exp'] = exp_date
            
            return df
            
        elif data_type == 'ATP':
            df['exp'] = exp_date
            calib_wells = [f'A{i}' for i in range(1,9)]
            calib_wells.insert(0, 'Kinetic read')
            
            column_dict = {'A1': 5000, 
                           'A2': 2500,
                           'A3': 500,
                           'A4': 250,
                           'A5': 50,
                           'A6': 25,
                           'A7': 5,
                           'A8': 0}
            
            atp_calib_dict = get_calib(df, data_type, calib_wells, column_dict)
            measured_atp_df = get_measured_df(df, 
                                              atp_calib_dict['value_name'], 
                                              atp_calib_dict['calib_wells'])
            
            return atp_calib_dict, measured_atp_df
        
        elif data_type == 'Nitrite':
            df['exp'] = exp_date
            
            if exp_date == '2019_08_28':
                calib_wells = [f'B{i}' for i in range(1,7)]
                calib_wells.insert(0, 'Kinetic read')
                
                column_dict = {'B1': 60,
                               'B2': 45,
                               'B3': 30,
                               'B4': 20,
                               'B5': 10,
                               'B6': 0}
            
            else:
                calib_wells = [f'A{i}' for i in range(1,7)]
                calib_wells.insert(0, 'Kinetic read')
                
                column_dict = {'A1': 60,
                               'A2': 45,
                               'A3': 30,
                               'A4': 20,
                               'A5': 10,
                               'A6': 0}
            
            no2_calib_dict = get_calib(df, data_type, calib_wells, column_dict)
            measured_no2_df = get_measured_df(df, 
                                              no2_calib_dict['value_name'], 
                                              no2_calib_dict['calib_wells'])
            
            return no2_calib_dict, measured_no2_df

    # The potentiostat data comes out as .txt files with tab separated columns
    elif data_type == "EChem":
        df_list = []
        for f in files:
            df = pd.read_csv(files[0], delimiter="\t")
            df["lead"] = f[-6:-4]
            df["chamber"] = [f"Chamber {l}" for l in df["lead"].values]
            df = df[["lead", "time (s)", "charge (C)", "current (A)"]]
            df_list.append(df)
        df = pd.concat(df_list)
        df["exp"] = exp_date

        return df

##### Define a function to remove data that should be excluded from analysis, e.g. known experimental errors

In [4]:
def remove_observations(df, chamber, exp):
    """
    Function to remove data that are inappropriate for analysis.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The dataframe that has some data that ought to be removed
    chamber : str
        The chamber id to remove
    exp : str
        The experiment date to remove
        
    Returns:
    --------
    cleaned_df : pandas DataFrame
        The dataframe without unwanted observations.
    """
    
    to_remove = df.loc[(df['chamber'] == chamber) & (df['exp'] == exp)]
    
    cleaned_df = df.iloc[df.index.difference(to_remove.index)]
    
    return cleaned_df

##### Define Functions necessary for processing CFU data

In [5]:
def correct_sampling_time(cfu_df, inoculation_ks):
    """
    Function to subtract the inoculation time from the time at which CFUs were sampled.
    This is done because the sampling time is relative to when the potentiostat was turned on,
    which is done the night before inoculation.
    
    Parameters:
    -----------
    cfu_df : pandas DataFrame
        The dataframe with the CFU data
    inoculation_ks: float
        The time, in kiloseconds, when the cells were inoculated. Time reported relative to when
        the potentiostat was turned on.
        
    Returns:
    --------
    cfu_df : pandas DataFrame
        The cfu_df, now with corrected sampling time
    """
    
    cfu_df['time (ks)'] = cfu_df['time (ks)'].values - inoculation_ks
    
    return cfu_df

def convert_ks_to_days(cfu_df):
    """
    Function to convert the kilosecond time scale to the daily time scale.
    
    Parameters:
    -----------
    cfu_df : pandas Dataframe
        The cfu_df with a corrected sampling time (i.e., returned by correct_sampling_time()).
    
    Returns:
    --------
    cfu_df : pandas Dataframe
        The cfu_df, now with a column of time in days.
    """
    
    # 1000 s in 1 ks, 60 s in 1 m, 60 m in 1 hr, 24 hr in 1 d
    cfu_df['time (days)'] = cfu_df['time (ks)'].values * 1000 / (60 * 60 * 24)
    
    return cfu_df

def scale_cell_density(cfu_df):
    """
    Function to scale cell density for ease of thinking.
    
    Parameters:
    -----------
    cfu_df : pandas DataFrame
        cfu_df as returned by convert_ks_to_days()
    
    Returns:
    --------
    cfu_df : pandas DataFrame
        cfu_df, now with a scaled CFU per mL per 1e9 column
    """
    
    cfu_df['scaled density (CFU per mL per 1e9)'] = cfu_df['density (CFU per mL)'].values / 1e9
    
    return cfu_df

def assign_timepoints(cfu_df):
    """
    Function to assign timepoints for each sampling time in a cfu_df. This is done so that
    different cfu_dfs, which have different exact sampling times, can be aligned.
    
    Parameters:
    -----------
    cfu_df : pandas DataFrame
        cfu_df, as returned by scale_cell_density()
    
    Returns:
    --------
    cfu_df : pandas DataFrame
        cfu_df, now with a timepoint column
    """
    
    times = sorted(cfu_df['time (days)'].unique())
    
    timepoint_dict = {t: i for i, t in enumerate(times)}
    cfu_df['timepoint'] = [timepoint_dict[t] for t in cfu_df['time (days)'].values]
    
    return cfu_df

def mean_sampling_time(cfu_df_list):
    """
    Function to determine the mean sampling time for each timepoint
    
    Parameters:
    -----------
    cfu_df_list : list of pandas DataFrames
        List of all cfu_dfs that should be analyzed together (i.e., from the same experimental procedure).
        Each item is a cfu_df, as returned by assign_timepoints().
        
    Returns:
    --------
    cfu_df_concat : pandas DataFrame
        cfu_df_concat, all the cfu data together in one dataframe. 
        Includes the column 'mean sampling time (days)', which helps align the different experiments
    """
    
    cfu_df_concat = pd.concat(cfu_df_list)
    
    grouped = cfu_df_concat.groupby(['timepoint', 'chamber'])
    
    mean_timepoint_dict = {}
    
    for g in grouped:
        
        mean_time = np.mean(g[1]['time (days)'].values)
        mean_timepoint_dict[g[0][0]] = mean_time
        
    cfu_df_concat['mean sampling time (days)'] = [mean_timepoint_dict[t] for t in cfu_df_concat['timepoint']]
    
    return cfu_df_concat

def average_cfu_tech_reps(cfu_df_concat):
    """
    Function to average the technical replicates from a given experiment
    so that biological replicates can be more easily compared
    
    Parameters:
    -----------
    cfu_df_concat : pandas DataFrame
        cfu_df_concat, as returned by mean_sampling_time()
    
    Returns: 
    --------
    cfu_bio_rep_master_df : pandas DataFrame
        Processed CFU data that can be plotted by cfu_plotter()
    """
    
    grouped = cfu_df_concat.groupby(['exp', 'timepoint', 'mean sampling time (days)', 'chamber', 'time (days)'])

    exps = []
    timepoints = []
    mean_times = []
    chambers = []
    real_times = []
    means = []

    for g in grouped:
        exps.append(g[0][0])
        timepoints.append(g[0][1])
        mean_times.append(g[0][2])
        chambers.append(g[0][3])
        real_times.append(g[0][4])
        means.append(np.mean(g[1]['scaled density (CFU per mL per 1e9)']))

    cfu_bio_rep_master_df = pd.DataFrame.from_dict({'exp': exps,
                                                    'timepoint': timepoints,
                                                    'mean sampling time (days)': mean_times,
                                                    'chamber': chambers,
                                                    'time (days)': real_times,
                                                    'mean scaled density (CFU per mL per 1e9)': means})
    
    return cfu_bio_rep_master_df

def assign_color_to_chamber(data_df, palette=bokeh.palettes.Colorblind6):
    """
    Function to assign colors to data points by chamber condition
    """
    
    data_df['color'] = [palette[int(ch[-1])] for ch in data_df['chamber'].values]
    
    return data_df

In [6]:
def cfu_plotter(
    cfu_df,
    palette,
    time_var="time (days)",
    y_type="log",
    title="Electrolysis Survival",
    error=False,
    height=400,
    width=700,
):
    """
    Function to plot CFUs over the duration of the experiment.
    
    Parameters:
    -----------
    cfu_df : pandas DataFrame
        The cfu_df as returned by scale_cell_density()
    
    palette : bokeh color palette
        The color palette to use for plotting
    
    time_var : str
        The time variable to use for plotting. Either 'time (days)' or 'time (ks)'
    
    y_type : str, default 'log'
        Type of scaling on the y-axis
    
    title : str, default 'Electrolysis Survival'
        Title to display over plot
        
    error : Bool, default False
        Determines whether function attempts to draw error bars. This requires the dataframe to have
        ymin and ymax values for the error bars for each data point.
        
    height : int, default 400
        Height of the plot
    
    width : int, default 700
        Width of the plot
        
    Returns:
    --------
    cfu_fig : bokeh figure object
        The resulting figure
    """

    cfu_fig = bokeh.plotting.figure(
        height=height,
        width=width,
        y_axis_type=y_type,
        y_axis_label="Scaled Density (CFU per mL per 1e9)",
        x_axis_label=time_var,
        title=title,
    )

    grouped = cfu_df.groupby(["exp", "chamber"])

    legend_items = []

    for i, g in enumerate(grouped):
        exp = g[0][0]
        ch = g[0][1]
        gdf = g[1]
        ch_index = int(ch[-1])
        if error:
            xs = [(x, x) for x in gdf[time_var]]
            ys = [(y1, y2) for y1, y2 in zip(gdf["ymin"], gdf["ymax"])]

            e = cfu_fig.multi_line(
                xs,
                ys,
                color=palette[i],
                line_width=2,
                muted_color=palette[i],
                muted_alpha=0,
            )
            
            l = cfu_fig.line(
                gdf[time_var],
                gdf["mean scaled density (CFU per mL per 1e9)"],
                color=palette[ch_index],
                line_width=2,
                muted_color=palette[ch_index],
                muted_alpha=0,
            )
            
            c = cfu_fig.circle(
                gdf[time_var],
                gdf["mean scaled density (CFU per mL per 1e9)"],
                color=palette[ch_index],
                size=7,
                fill_alpha=0,
                muted_color=palette[ch_index],
                muted_alpha=0,
            )

        else:
            c = cfu_fig.circle(
                gdf[time_var],
                gdf["mean scaled density (CFU per mL per 1e9)"],
                color=palette[ch_index],
                size=10,
                line_width=5,
                line_alpha=0.8,
                fill_alpha=0,
                muted_color=palette[ch_index],
                muted_alpha=0,
            )
            
            l = cfu_fig.line(
                gdf[time_var],
                gdf["mean scaled density (CFU per mL per 1e9)"],
                color=palette[ch_index],
                line_width=2,
                muted_color=palette[ch_index],
                muted_alpha=0,
            )
            
        if error:
            legend_items.append((f"{ch} {exp}", [l, c, e]))

        else:
            legend_items.append((f"{ch} {exp}", [l, c]))

    legend = Legend(items=legend_items)
    cfu_fig.add_layout(legend, "right")

    cfu_fig.legend.click_policy = "mute"

    cfu_fig.xaxis.axis_label_text_font_size = "16pt"
    cfu_fig.xaxis.major_label_text_font_size = "16pt"

    cfu_fig.yaxis.axis_label_text_font_size = "12pt"
    cfu_fig.yaxis.major_label_text_font_size = "16pt"

    cfu_fig.title.text_font_size = "16pt"

    return cfu_fig

In [7]:
def process_cfus(rel_path, date_list, exp_date_list, inoculation_time_list, palette=bokeh.palettes.Colorblind6):
    """
    Function to process all the CFU data into a plottable form
    
    Parameters:
    -----------
    rel_path : str
        The relative path to follow to find the data
    date_list : list of str
        The dates on which the experiments began in the format YYYY_MM_DD.
        Used to find files
    exp_date_list : list of str
        The date that I want to use to label the data. In the case of the
        potentiostat data, this is different from the date on which the 
        potentiostat was started because that is a day before inoculation.
        I.e., if the all the CFUs, ATP measurements, etc. begin on 2019_10_20,
        the potentiostat began on 2019_10_19, but I want to label these data
        with the same date. Format YYYY_MM_DD.
    inoculation_time_list : list of float
        List of inoculation times to input into correct_sampling_time()
        
    Returns:
    --------
    cfu_bio_rep_master_df : pandas DataFrame
        Processed CFU data as returned by average_cfu_tech_reps()
    """
    
    cfu_df_list = []
    
    for d, ed, it in zip(date_list, exp_date_list, inoculation_time_list):
        cfu_df = get_data(rel_path, 'CFUs', d, ed)
        cfu_df = correct_sampling_time(cfu_df, it)
        cfu_df = convert_ks_to_days(cfu_df)
        cfu_df = scale_cell_density(cfu_df)
        cfu_df = assign_timepoints(cfu_df)
        
        cfu_df_list.append(cfu_df)
    
    cfu_df_concat = mean_sampling_time(cfu_df_list)
    
    cfu_bio_rep_master_df = average_cfu_tech_reps(cfu_df_concat)
    cfu_bio_rep_master_df = assign_color_to_chamber(cfu_bio_rep_master_df, palette)
    
    return cfu_bio_rep_master_df

##### The inoculation times for 2019_10_20, 2019_11_11, and 2019_12_02 (the experiments I am currently analyzing)

2019_08_28: 13.51 ks

2019_10_07: 85.75 ks

2019_10_20: 68.6 ks

2019_11_11: 78.8 ks

2019_12_02: 87.0 ks

In [8]:
date_list = ['2019_08_28', '2019_10_07', '2019_10_20', '2019_11_11', '2019_12_02']
exp_date_list = ['2019_08_28', '2019_10_07', '2019_10_20', '2019_11_11', '2019_12_02']
inoculation_time_list = [13.51, 85.75, 68.6, 78.8, 87.0]

In [9]:
cfu_bio_rep_master_df = process_cfus('./data/', date_list, exp_date_list, inoculation_time_list)

##### The chamber 5 from 2019_12_02 ended up being abiotic because I spilled a volume of cells on accident, so I am removing it from the data set

In [10]:
to_remove = cfu_bio_rep_master_df.loc[(cfu_bio_rep_master_df['chamber'] == 'Chamber 5') & (cfu_bio_rep_master_df['exp'] == '2019_12_02')]
to_remove

Unnamed: 0,exp,timepoint,mean sampling time (days),chamber,time (days),mean scaled density (CFU per mL per 1e9),color
149,2019_12_02,0,0.003565,Chamber 5,0.003472,0.0,#D55E00
155,2019_12_02,1,0.171736,Chamber 5,0.107639,3.266667e-05,#D55E00
161,2019_12_02,2,0.745833,Chamber 5,0.802083,8e-06,#D55E00
167,2019_12_02,3,1.892454,Chamber 5,1.891204,1.833333e-06,#D55E00
173,2019_12_02,4,2.955185,Chamber 5,2.978009,6.666667e-08,#D55E00
179,2019_12_02,5,3.887593,Chamber 5,3.91088,6.666667e-07,#D55E00


In [11]:
cfu_analyzed = cfu_bio_rep_master_df.iloc[cfu_bio_rep_master_df.index.difference(to_remove.index)]

In [12]:
p = cfu_plotter(cfu_analyzed, palette, height=700, width=800)
bokeh.io.show(p)

In [13]:
# def get_ATP_df(date, path='../../../data/PlateReader/ATP'):
    
#     # Get data
#     path_to_data = os.path.join(path, f'{date}_EChemSurvival_ATP.csv')
#     df = pd.read_csv(path_to_data)
#     df['exp'] = date
        
#     return df

def get_calib(df, data_type, calib_wells, column_dict, verbose=True):
    """
    Function to extract calibration data from the ATP_df or NO2_df
    that was read from the plate-reader data
    
    Parameters:
    -----------
    df : pandas DataFrame
        The DataFrame object created while running get_data()
        with an ATP or NO2 data_type parameter
    data_type : str
        'ATP' or 'NO2', given as a parameter to get_data()
    verbose: Boolean
        Determines whether to print out the linear regression
        stats when the function is called
    
    Returns:
    --------
    calib_dict : dictionary
        items: ['slope': the slope of the linear regression (float),
                'intercept': the intercept of the linear regression (float),
                'r': the r value of the linear regression (float),
                'p-value': the p-value of the linear regression (float),
                'stderr': the standard error of the linear regression (float),
                'plot': the Bokeh plot of the calibration points and linear regression.]
    """
    
    date = df['exp'].unique()
    
    assert(
        len(date) == 1
          ), f'You are trying to calibrate over more than one experiment, which is wrong. Experiments: {date}'
    
    date = date[0]
    
    # Extract calibration data
    if data_type == 'ATP':
#         calib_wells = ['A{}'.format(i) for i in range(1,9)]
#         calib_wells.insert(0, 'Kinetic read')
        calib_df = df[calib_wells]
#         column_dict = {'A1': 5000, 
#                        'A2': 2500,
#                        'A3': 500,
#                        'A4': 250,
#                        'A5': 50,
#                        'A6': 25,
#                        'A7': 5,
#                        'A8': 0}
        
        var_name = 'ATP (nM)' 
        value_name = 'luminescence (AU)'
    
    elif data_type == 'Nitrite':
#         calib_wells = [f'A{i}' for i in range(1,7)]
#         calib_wells.insert(0, 'Kinetic read')
        calib_df = df[calib_wells]
#         column_dict = {'A1': 60,
#                        'A2': 45,
#                        'A3': 30,
#                        'A4': 20,
#                        'A5': 10,
#                        'A6': 0}
        
        var_name = 'NO2 (uM)'
        value_name = 'absorbance (540 nm)'
    
    # Assign concentrations to wells in nM
    calib_df = calib_df.rename(columns=column_dict)

    # Make tidy
    calib_df = calib_df.melt(id_vars='Kinetic read', var_name=var_name, value_name=value_name)
    
    # Get the maximum luminescence associated with each concentration
    calib_grouped = calib_df.groupby(var_name)
    concs = []
    max_vals = []

    for cg in calib_grouped:
        conc = cg[0]
        df = cg[1]

        val = df[value_name].values
        max_val = np.max(val)

        concs.append(conc)
        max_vals.append(max_val)
        
    
    # Plot to see if calibration is reasonable
    cal_plot = bokeh.plotting.figure(width=600,
                                     height=400,
                                     x_axis_label=var_name,
                                     y_axis_label=value_name,
                                     title=f'{data_type} cal {date}')

    cal_plot.circle(concs, max_vals)
    
    # Get the linear regression
    slope, inter, r, pval, stderr = st.linregress(concs, max_vals)
    
    cal_plot.line(np.array(concs), 
                      np.array(concs) * slope + inter, 
                      color='orange',
                      line_width=2)
    
#     label = bokeh.models.Label(text=f"""
#         ATP calibration curve linear regression
#         ---------------------------------------
#         Slope: {atp_slope}
#         Intercept: {atp_int}
#         R^2: {atp_r ** 2}
#         ---------------------------------------
#         """,
#                               x=600,
#                               y=0)
    
#     atp_cal_plot.add_layout(label)
    
    if verbose:
        print(f"""
        {data_type} calibration curve linear regression
        ---------------------------------------
        Slope: {slope}
        Intercept: {inter}
        R^2: {r ** 2}
        ---------------------------------------
        """)
    
    calib_dict = {'slope': slope,
                  'intercept': inter,
                  'r': r,
                  'p-value': pval,
                  'stderr': stderr,
                  'calib_df': calib_df,
                  'plot': cal_plot,
                  'value_name': value_name,
                  'var_name': var_name,
                  'calib_wells': calib_wells}
    
    return calib_dict
    
def lin_reg_inference(slope, intercept, measured_y):
    """
    Function to infer the x-value corresond to the measured
    y-value, based on the linear regression of a calibration
    curve.
    
    Parameters:
    -----------
    slope : float
        The slope of the linear regression of a calibration curve
    intercept : float
        The intercept of the linear regression of a calibration curve
    measured_y : float
        The y_value that was measured
    
    Returns:
    --------
    (measured_y - intercept) / slope : float
        The inferred x-value
    """

    return (measured_y - intercept) / slope

def get_measured_df(df, value_name, calib_wells):
    """
    Function to extract a DataFrame of only the experimental values from
    a platereader ATP luciferase assay or the NO2 Greiss assy.
    
    Parameters:
    -----------
    df : pandas DataFrame
        The full pandas DataFrame as created by get_data() when given
        the ATP or NO2 data_type parameter
    calib_wells : list of str, default ['A{}'.format(i) for i in range(1,9)]
        The well IDs, which are column headers in ATP_df, that correspond
        to the calibration points
    
    Returns:
    --------
    measured_df : pandas DataFrame
        A tidy dataframe with only the data corresponding to experimental
        samples
    """
    
    # Disregard the data corresponding to the calibration wells,
    # but keep the Kinetic Read column
    measured_df = df[df.columns.difference(calib_wells[1:])]
    
    # Begin tidying
    measured_df = measured_df.melt(id_vars=['exp', 'Kinetic read'], var_name='chamber', value_name=value_name)
    
    # Separate row and column IDs
    measured_df['row'] = [s[0] for s in measured_df['chamber'].values]
    measured_df['column'] = [s[1:] for s in measured_df['chamber'].values]
    
    # Assign chamber IDs, accounting for whether samples were in
    # left or right half of the 96-well plate
    if '1' in measured_df['column'].values:
        measured_df['chamber'] = [f'Chamber {int(s[1:]) - 1}' for s in measured_df['chamber'].values]
        
    else:
        measured_df['chamber'] = [f'Chamber {int(s[1:]) - 7}' for s in measured_df['chamber'].values]
    
    
    # Only interested in the max luminescence intensity for each well
    measured_grouped = measured_df.groupby(['chamber', 'row'])
    
    rows = []
    chambers = []
    measured_max_vals = []

    for mg in measured_grouped:
        chamber, row = mg[0]
        df = mg[1]

        vals = df[value_name].values
        measured_max_vals.append(np.max(vals))
        chambers.append(chamber)
        rows.append(row)
    
    
    measured_df = pd.DataFrame.from_dict({'chamber': chambers,
                                          'row': rows,
                                          f'max {value_name}': measured_max_vals})
    
    measured_df['exp'] = df['exp'].values[0]
    
    return measured_df
        
def infer_concs(measured_df, slope, intercept, data_type, val_name, dilution_factor=1, scaling_factor=1):
    """
    Function to infer the ATP concentration from measured
    luminescence intensities or NO2 concentration from measured
    absorbance
    
    Parameters:
    -----------
    measured_ATP_df : pandas DataFrame
        The measured_ATP_df as returned by assign_sample_time_to_ATP_df()
    slope : float
        The slope from the calib_dict returned by get_calib()
    intercept : float
        The int from the calib_dict returned by get_calib()
        
    Returns:
    --------
    measured_df : pandas DataFrame
        measured_df, now with ATP concentration measurements
    """
    
    vals = measured_df[val_name].values
    
    # Samples were diluted 1:50 when taken from chambers (20 uL in 1 mL), but so were the standards, so no correction needed
    inferred_vals = lin_reg_inference(slope, intercept, vals)
    undiluted_inferred_vals = inferred_vals * dilution_factor
    
    if data_type == 'ATP':
        measured_df['ATP (nM)'] = undiluted_inferred_vals
        # Chambers have 100 mL
        measured_df['ATP (nmol)'] = measured_df['ATP (nM)'].values * 0.1
        
    if data_type == 'Nitrite':
        measured_df['NO2 (uM)'] = undiluted_inferred_vals
        measured_df['NO2 (mM)'] = undiluted_inferred_vals / scaling_factor
    
    return measured_df

def assign_sample_time_to_df(measured_df, cfu_df):
    """
    Function to align the sampling times across the CFU and ATP data
    
    Parameters:
    -----------
    measured_atp_df : pandas DataFrame
        The dataframe of ATP data as returned by infer_atp_concs()
    
    cfu_df : pandas DataFrame
        The dataframe of ATP data as returned by process_cfus()
    
    Returns:
    --------
    measured_atp_df : pandas DataFrame
        measured_atp_df, now with sampling times corresponding to the cfu_df
    """
    
    assert(
        len(measured_df) == len(cfu_df)
    ), 'You are trying to align dataframes of two different lengths'
    
    # Ensure that all the observations are in the same order
    measured_df = measured_df.sort_values(['exp', 'chamber', 'row'])
    cfu_df = cfu_df.sort_values(['exp', 'chamber', 'timepoint'])
    
    
    measured_df['time (days)'] = cfu_df['time (days)'].values
    measured_df['mean sampling time (days)'] = cfu_df['mean sampling time (days)'].values
    
#     cfu_df = cfu_df.loc[all_exps_cfu_mean_df['exp'].isin(measured_atp_df['exp'])]
    
#     grouped_ATP = measured_atp_df.groupby(['exp', 'chamber', 'row'])
#     grouped_cfu = cfu_df.groupby(['exp', 'chamber', 'time (days)'])
    
#     ATP_mini_dfs = []
    
#     for ga, gc in zip(grouped_ATP, grouped_cfu):
        
#         atp_gdf = ga[1].sort_values('row')
#         cfu_gdf = gc[1].sort_values('time (days)')
        
#         atp_gdf['time (days)'] = cfu_gdf['time (days)'].values
        
#         ATP_mini_dfs.append(atp_gdf)
        
    
#     measured_atp_df = pd.concat(ATP_mini_dfs)
    
    return measured_df

def normalize_atp_to_cell_density(measured_atp_df, cfu_df):
    """
    Function to normalize ATP concentration measurements to cell density
    
    Parameters:
    -----------
    measured_atp_df : pandas DataFrame
        measured_atp_df as returned by assign_sample_time_to_ATP_df()
    
    cfu_df : pandas DataFrame
        The dataframe of CFU data that was used in assign_sample_time_to_ATP_df()
    
    Returns:
    --------
    concat_cfu_atp_df : pandas DataFrame
        A horizontally concatenated dataframe including all the CFU and ATP data, including
        a normalized ATP column
    """
    
    # Make sure that all the observations are sorted in the same way
#     measured_atp_df = measured_atp_df.sort_values(by=['exp', 'chamber', 'row'])
#     cfu_df = cfu_df.sort_values(by=['exp', 'chamber', 'timepoint'])
# #     pdb.set_trace()
#     # Reset indices to make sure that the concatenation works
#     measured_atp_df = measured_atp_df.reset_index(drop=True)
#     cfu_df = cfu_df.reset_index(drop=True)
    
#     # Concatenate horizontally and remove duplicate columns
#     concat_cfu_atp_df = pd.concat([cfu_df, measured_atp_df], axis=1)
    
    
#     concat_cfu_atp_df = concat_cfu_atp_df[concat_cfu_atp_df.columns.difference(['row'])]

    cfu_atp_df = cfu_df.merge(measured_atp_df, on=['exp', 'time (days)', 'chamber'])
    # cfu_atp_df = cfu_atp_df.T.drop_duplicates().T
    # cfu_atp_df = cfu_atp_df[cfu_atp_df.columns.difference(['row'])]
    
    # Normalize
    cfu_atp_df['normalized ATP (nmol / CFU / 1e9)'] = (cfu_atp_df['ATP (nmol)'].values / (cfu_atp_df['mean scaled density (CFU per mL per 1e9)'].values * 100))
    
    return cfu_atp_df

def process_atp_no2(rel_path, date_list, exp_date_list, cfu_master_df):
    """
    Function to process all the ATP data into a plottable form
    
    Parameters:
    -----------
    rel_path : str
        The relative path to follow to find the data
    date_list : list of str
        The dates on which the experiments began in the format YYYY_MM_DD.
        Used to find files
    exp_date_list : list of str
        The date that I want to use to label the data. In the case of the
        potentiostat data, this is different from the date on which the 
        potentiostat was started because that is a day before inoculation.
        I.e., if the all the CFUs, ATP measurements, etc. begin on 2019_10_20,
        the potentiostat began on 2019_10_19, but I want to label these data
        with the same date. Format YYYY_MM_DD.
    cfu_master_df : pandas DataFrame
        The dataframe of CFU data as returned by process_cfus()
        
    Returns:
    --------
    cfu_atp_master_df : pandas DataFrame
        Processed ATP data as returned by normalize_atp_to_cell_density()
    """
    
    cfu_atp_no2_df_list = []
    
    for d, ed, it in zip(date_list, exp_date_list, inoculation_time_list):

        atp_calib_df, measured_atp_df = get_data(rel_path, 'ATP', d, ed)
        atp_slope = atp_calib_df['slope']
        atp_int = atp_calib_df['intercept']
        cfu_df = cfu_master_df.loc[cfu_master_df['exp'] == ed]
        measured_atp_df = infer_concs(measured_atp_df, 
                                      atp_slope, 
                                      atp_int, 
                                      'ATP', 
                                      'max luminescence (AU)', 
                                      dilution_factor=1, 
                                      scaling_factor=1)
        
        no2_calib_df, measured_no2_df = get_data(rel_path, 'Nitrite', d, ed)
        no2_slope = no2_calib_df['slope']
        no2_int = no2_calib_df['intercept']
        cfu_df = cfu_master_df.loc[cfu_master_df['exp'] == ed]
        measured_no2_df = infer_concs(measured_no2_df, 
                                      no2_slope, 
                                      no2_int, 
                                      'Nitrite', 
                                      'max absorbance (540 nm)', 
                                      dilution_factor=200, 
                                      scaling_factor=1000)
        
        # When I requantified the samples from 2019_10_20 because my Bactiter Glo
        # was too old when I did it the first time, I used row B to run samples
        # from 2019_10_07-2019_10_11 to verify that the requanitfication was valid.
        # I need to remove this row from the analysis, however.
        
        # Likewise, when I ran the experiment from 2019_12_02-2019_12_06, Chamber 5
        # ended up being abiotic. I also need to exclude it from the analysis.
        
        if ed == '2019_10_20':
            measured_atp_df = measured_atp_df.loc[measured_atp_df['row'] != 'B']
            
        elif ed == '2019_12_02':
            measured_atp_df = measured_atp_df.loc[measured_atp_df['chamber'] != 'Chamber 5']
            measured_no2_df = measured_no2_df.loc[measured_no2_df['chamber'] != 'Chamber 5']
            cfu_df = cfu_df.loc[cfu_df['chamber'] != 'Chamber 5']
        
#         measured_atp_df = measured_atp_df[measured_atp_df.columns.difference(['row'])]
#         measured_no2_df = measured_no2_df[measured_no2_df.columns.difference(['row'])]
        
        measured_atp_df = assign_sample_time_to_df(measured_atp_df, cfu_df)
        measured_no2_df = assign_sample_time_to_df(measured_no2_df, cfu_df)
        
        cfu_atp_df = normalize_atp_to_cell_density(measured_atp_df, cfu_df)
        
        cfu_atp_no2_df = cfu_atp_df.merge(measured_no2_df, on=['exp', 'chamber', 'time (days)'])
        
        # concat_cfu_atp_no2_df = pd.concat([concat_cfu_atp_df, measured_no2_df], axis=1)
#         concat_cfu_atp_no2_df = concat_cfu_atp_no2_df.T.drop_duplicates().T
        
#         concat_cfu_atp_no2_df = concat_cfu_atp_no2_df[concat_cfu_atp_no2_df.columns.difference(['row'])]
        
        cfu_atp_no2_df_list.append(cfu_atp_no2_df)
    
    cfu_atp_no2_master_df = pd.concat(cfu_atp_no2_df_list)
    
    return cfu_atp_no2_master_df

In [14]:
cfu_atp_no2_master_df = process_atp_no2('./data/', date_list, exp_date_list, cfu_bio_rep_master_df)


        ATP calibration curve linear regression
        ---------------------------------------
        Slope: 30.236792050511013
        Intercept: 563.190277405407
        R^2: 0.9997851976864303
        ---------------------------------------
        

        Nitrite calibration curve linear regression
        ---------------------------------------
        Slope: 0.050128643216080404
        Intercept: 0.13446231155778876
        R^2: 0.9991076565801764
        ---------------------------------------
        

        ATP calibration curve linear regression
        ---------------------------------------
        Slope: 30.26151068903325
        Intercept: 305.4519950441281
        R^2: 0.9999768440782568
        ---------------------------------------
        

        Nitrite calibration curve linear regression
        ---------------------------------------
        Slope: 0.04692060301507536
        Intercept: 0.18468341708542746
        R^2: 0.9993080786230475
        --------

In [15]:
cfu_atp_no2_master_df.head()

Unnamed: 0,exp,timepoint,mean sampling time (days)_x,chamber,time (days),mean scaled density (CFU per mL per 1e9),color,row_x,max luminescence (AU),ATP (nM),ATP (nmol),mean sampling time (days)_y,normalized ATP (nmol / CFU / 1e9),row_y,max absorbance (540 nm),NO2 (uM),NO2 (mM),mean sampling time (days)
0,2019_08_28,0,0.003565,Chamber 0,0.007755,0.78,#0072B2,B,43146,1408.311095,140.83111,0.003565,1.805527,C,0.238,413.087935,0.413088,0.003565
1,2019_08_28,0,0.003565,Chamber 1,0.007755,0.773333,#E69F00,B,39061,1273.210784,127.321078,0.003565,1.646393,C,0.214,317.334296,0.317334,0.003565
2,2019_08_28,0,0.003565,Chamber 2,0.007755,0.88,#F0E442,B,52051,1702.819851,170.281985,0.003565,1.935023,C,0.24,421.067404,0.421067,0.003565
3,2019_08_28,0,0.003565,Chamber 3,0.007755,0.79,#009E73,B,47943,1566.958877,156.695888,0.003565,1.983492,C,0.252,468.944224,0.468944,0.003565
4,2019_08_28,0,0.003565,Chamber 4,0.007755,0.843333,#56B4E9,B,52601,1721.009611,172.100961,0.003565,2.040723,C,0.28,580.656803,0.580657,0.003565


In [16]:
df_for_paper = cfu_atp_no2_master_df.loc[cfu_atp_no2_master_df['chamber'].isin(['Chamber 0', 'Chamber 1'])]

In [17]:

def plot_grids(df, y, y_range=(1e-5, 1e1), semilog=True, legend_position='bottom_left'):
    plots = []
    
    condition_dict = {
        "Chamber 0": "PCA + electrode",
        "Chamber 1": "electrode",
        "Chamber 2": "PCA + acetate + electrode",
        "Chamber 3": "acetate + electrode",
        "Chamber 4": "PCA + acetate",
        "Chamber 5": "acetate"
    }

    if semilog:
        plot = bokeh.plotting.figure(width=400, 
                                         height=300, 
                                         y_axis_type='log', 
                                         y_axis_label=y,
                                         x_axis_label='Time (days)'
                                         # title=condition_dict[chamber],
                                        )
    else:
        plot = bokeh.plotting.figure(width=400, 
                                         height=300, 
                                         y_axis_label=y,
                                         x_axis_label='Time (days)'
                                         # y_axis_type='log', 
                                         # title=condition_dict[chamber],
                                        )
    grouped = df.groupby(['chamber'])
    palette = [bokeh.palettes.Colorblind4[3], 'gainsboro']
    for g in grouped:
        chamber = g[0]
        


        plot.circle(g[1]['time (days)'], g[1][y], color=palette[int(chamber[-1])], line_color='black', size=7, alpha=0.7, line_alpha=1, legend_label=condition_dict[chamber])
        
        exp_grouped = g[1].groupby('exp')
        for eg in exp_grouped:
            plot.line(eg[1]['time (days)'], eg[1][y], line_color=palette[int(chamber[-1])], width=2, alpha=0.7)

        plot.y_range = Range1d(y_range[0], y_range[1])
        plot.xaxis.major_label_text_font_size = '12pt'
        plot.yaxis.major_label_text_font_size = '12pt'
        plot.title.text_font_size = '12pt'
        plot.legend.location = legend_position
        
        plot.output_backend = 'svg'
        
        plots.append(plot)


    return plots

In [18]:
cfu_plots = plot_grids(df_for_paper, 'mean scaled density (CFU per mL per 1e9)')
cfu_grid = bokeh.layouts.gridplot(cfu_plots, ncols=2)
bokeh.io.export_svg(cfu_grid, filename='./plots/cfu_gridplot.svg')
bokeh.io.show(cfu_grid)

  for g in grouped:
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p4551', ...)
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p4551', ...)


In [19]:
atp_plots = plot_grids(df_for_paper, 'normalized ATP (nmol / CFU / 1e9)', y_range=(1e-3, 3))
atp_grid = bokeh.layouts.gridplot(atp_plots, ncols=2)
bokeh.io.export_svg(atp_grid, filename='./plots/atp_gridplot.svg')
bokeh.io.show(atp_grid)

  for g in grouped:
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p6098', ...)
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p6098', ...)


In [20]:
no2_plots = plot_grids(df_for_paper, 'NO2 (mM)', y_range=(-1, 12), semilog=False, legend_position='bottom_right')
no2_grid = bokeh.layouts.gridplot(no2_plots, ncols=2)
bokeh.io.export_svg(no2_grid, filename='./plots/no2_gridplot.svg')
bokeh.io.show(no2_grid)

  for g in grouped:
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p7714', ...)
ERROR:bokeh.core.validation.check:E-1027 (REPEATED_LAYOUT_CHILD): The same model can't be used multiple times in a layout: GridPlot(id='p7714', ...)


In [21]:
t2 = df_for_paper.loc[df_for_paper['timepoint'] == 2]

In [22]:
t2

Unnamed: 0,exp,timepoint,mean sampling time (days)_x,chamber,time (days),mean scaled density (CFU per mL per 1e9),color,row_x,max luminescence (AU),ATP (nM),ATP (nmol),mean sampling time (days)_y,normalized ATP (nmol / CFU / 1e9),row_y,max absorbance (540 nm),NO2 (uM),NO2 (mM),mean sampling time (days)
12,2019_08_28,2,0.745833,Chamber 0,0.721644,0.996667,#0072B2,D,10885,341.3659,34.13659,0.745833,0.342508,E,1.061,3696.639801,3.69664,0.745833
13,2019_08_28,2,0.745833,Chamber 1,0.721644,1.036667,#E69F00,D,10137,316.627826,31.662783,0.745833,0.305429,E,0.796,2639.360038,2.63936,0.745833
12,2019_10_07,2,0.745833,Chamber 0,0.70544,1.25,#0072B2,D,29265,956.976283,95.697628,0.745833,0.765581,D,1.199,4323.544532,4.323545,0.745833
13,2019_10_07,2,0.745833,Chamber 1,0.70544,1.07,#E69F00,D,22275,725.989797,72.59898,0.745833,0.678495,D,0.854,2852.975196,2.852975,0.745833
12,2019_10_20,2,0.745833,Chamber 0,0.741898,1.096667,#0072B2,E,31979,981.733922,98.173392,0.745833,0.895198,D,1.219,4408.794928,4.408795,0.745833
13,2019_10_20,2,0.745833,Chamber 1,0.741898,1.106667,#E69F00,E,16994,515.468892,51.546889,0.745833,0.465785,D,0.919,3130.038984,3.130039,0.745833
12,2019_11_11,2,0.745833,Chamber 0,0.758102,1.313333,#0072B2,D,42186,1371.090054,137.109005,0.745833,1.043977,D,1.704,6187.613002,6.187613,0.745833
13,2019_11_11,2,0.745833,Chamber 1,0.758102,0.936667,#E69F00,D,28315,912.755371,91.275537,0.745833,0.974472,D,1.193,4144.762747,4.144763,0.745833
10,2019_12_02,2,0.745833,Chamber 0,0.802083,1.18,#0072B2,D,23443,490.250043,49.025004,0.745833,0.415466,D,1.285,4524.548891,4.524549,0.745833
11,2019_12_02,2,0.745833,Chamber 1,0.802083,1.18,#E69F00,D,21018,437.746793,43.774679,0.745833,0.370972,D,1.086,3731.164269,3.731164,0.745833


In [23]:
bootstrap_efron_tibshirani(t2.loc[t2['chamber'] == 'Chamber 0']['NO2 (mM)'].values, t2.loc[t2['chamber'] == 'Chamber 1']['NO2 (mM)'].values) * 6

0.018029999999999997

In [42]:
def bootstrap_survival_exps(df, measurement, reps=1000000):
    """
    Function that returns bonferroni-corrected p-value for each timepoint for a given metric.
    """
    
    timepoint_bonferroni_dict = {}
    
    timepoint_grouped = df.groupby('timepoint')
    
    for tg in timepoint_grouped:
        tp = tg[0]
        
        set1 = tg[1].loc[tg[1]['chamber'] == 'Chamber 0'][measurement].values
        set2 = tg[1].loc[tg[1]['chamber'] == 'Chamber 1'][measurement].values

        p = bootstrap_efron_tibshirani(set1, set2, reps)
        bonferroni = p * len(timepoint_grouped)
        
        timepoint_bonferroni_dict[tp] = (p, bonferroni)
        
    return timepoint_bonferroni_dict

The first number is the raw p-value, and the second is the Bonferroni-corrected p-value. I consider significant only those for which the Bonferroni-corrected p-value is less than 0.05.

In [43]:
bootstrap_survival_exps(df_for_paper, 'NO2 (mM)')

{0: (0.880681, 5.284086),
 1: (0.020743, 0.12445800000000001),
 2: (0.00293, 0.01758),
 3: (0.001917, 0.011502),
 4: (0.977547, 5.8652820000000006),
 5: (0.339822, 2.038932)}

In [44]:
bootstrap_survival_exps(df_for_paper, 'normalized ATP (nmol / CFU / 1e9)')

{0: (0.739246, 4.4354759999999995),
 1: (0.855634, 5.133804),
 2: (0.420811, 2.524866),
 3: (0.033526, 0.201156),
 4: (0.051827, 0.31096199999999996),
 5: (0.568805, 3.41283)}

In [45]:
bootstrap_survival_exps(df_for_paper, 'mean scaled density (CFU per mL per 1e9)')

{0: (0.542013, 3.252078),
 1: (0.40362, 2.4217199999999997),
 2: (0.100727, 0.604362),
 3: (0.120195, 0.72117),
 4: (0.338699, 2.0321939999999996),
 5: (0.002543, 0.015258)}

In [28]:
0.015/ 6

0.0025

#### Getting current traces

In [29]:
def current_plotter(df, palette, title='Current during electrolysis', show_legend=True, width=400, height=300):
    
    condition_dict = {
        0: "PCA + electrode",
        1: "electrode",
        2: "PCA + acetate + electrode",
        3: "acetate + electrode",
        4: "PCA + acetate",
        5: "acetate"
    }
    
    plots = []

    chamber_grouped = df.groupby(['chamber'])

    legend_items = []
    
    current_fig = bokeh.plotting.figure(height=height,
                                    width=width,
                                    # y_axis_type='log',
                                    y_axis_label='Current (µA)',
                                    x_axis_label='Time (days)',
                                    # title=condition_dict[int(ch)]
                                   )

    for i, g in enumerate(chamber_grouped):
        
        
        
        # exp = g[0][0]
        ch = g[0]
        gdf = g[1]
        

        
        exp_grouped = gdf.groupby(['exp'])
        
        for eg in exp_grouped:
            
            e_df = eg[1]

            l = current_fig.line(e_df['time (days)'], 
                                 e_df['current (uA)'], 
                                 color=palette[int(ch)], 
                                 alpha=0.7, 
                                 line_width=3, 
                                 muted_color=palette[int(ch)],
                                 muted_alpha=0.1,
                                 legend_label=condition_dict[int(ch)])
            
        current_fig.output_backend = 'svg'
        current_fig.xaxis.major_label_text_font_size = '12pt'
        current_fig.yaxis.major_label_text_font_size = '12pt'
        current_fig.xaxis.axis_label_text_font_size = '12pt'
        current_fig.yaxis.axis_label_text_font_size = '12pt'
        current_fig.title.text_font_size = '12pt'
        
        current_fig.y_range = Range1d(-10, 2.5e2)
        
        
        plots.append(current_fig)
        
        
#         legend_items.append((f'Chamber {int(ch)} {exp}', [l]))
        
#         if show_legend:
#             legend = Legend(items=legend_items)

#             current_fig.add_layout(legend, 'right')
#             current_fig.legend.click_policy = 'mute'

#         else:
#             pass


    
    return plots

In [30]:
def get_potentiostat_df(date, inoculation_ks, path='./data/EChem'):
    
    dfs = []
    
    for p in ['p0', 'p1', 'p2', 'p3']:
        path_to_data = os.path.join(path, f'{date}_electrolysis_survival_{p}.txt')
        df = pd.read_csv(path_to_data, delimiter='\t')
        df['chamber'] = p[1]
        df['exp'] = date
        
        dfs.append(df)
    
    poten_df = pd.concat(dfs)
    poten_df = poten_df[['exp', 'chamber', 'time (s)', 'charge (C)', 'current (A)']]
    
    poten_df['time (s)'] = poten_df['time (s)'] - inoculation_ks * 1000
    
    poten_df['time (days)'] = poten_df['time (s)'] / (60 * 60 * 24)
    poten_df['current (uA)'] = poten_df['current (A)'] * 1e6

        
        
    return poten_df

def get_spiking_potentiostat_df(date, inoculation_ks, path='./data/EChem'):
    
    dfs = []
    
    for p in ['p0', 'p1', 'p2', 'p3']:
        path_to_data = os.path.join(path, f'{date}_EChemSurvival_Spiking_{p}.txt')
        df = pd.read_csv(path_to_data, delimiter='\t')
        df['chamber'] = p[1]
        df['exp'] = date
        
        dfs.append(df)
    
    poten_df = pd.concat(dfs)
    poten_df = poten_df[['exp', 'chamber', 'time (s)', 'charge (C)', 'current (A)']]
    
    poten_df['time (s)'] = poten_df['time (s)'] - inoculation_ks * 1000
    
    poten_df['time (days)'] = poten_df['time (s)'] / (60 * 60 * 24)
    poten_df['current (uA)'] = poten_df['current (A)'] * 1e6

        
        
    return poten_df

In [31]:
test = get_potentiostat_df('2019_08_28', inoculation_time_list[0])
test.head()

Unnamed: 0,exp,chamber,time (s),charge (C),current (A),time (days),current (uA)
0,2019_08_28,0,-13510.0,0.007352,0.00147,-0.156366,1470.0
1,2019_08_28,0,-13505.0,0.01188,0.000906,-0.156308,905.8
2,2019_08_28,0,-13500.0,0.003162,0.001744,-0.15625,1744.0
3,2019_08_28,0,-13495.0,-0.002038,0.00104,-0.156192,1040.0
4,2019_08_28,0,-13490.0,-0.005598,0.000712,-0.156134,712.1


In [32]:
test_plot = current_plotter(test.loc[test['time (days)'] >= 0][::200], bokeh.palettes.Colorblind6)
bokeh.io.show(test_plot[0])

  for i, g in enumerate(chamber_grouped):
  for eg in exp_grouped:
  for eg in exp_grouped:
  for eg in exp_grouped:
  for eg in exp_grouped:


In [33]:
# The potentiostat files are usually dated a day earlier than the CFU, ATP, and NO2 files
# because the potentiostat was set to pre-reduce the PCA a day earlier

potentiostat_dates = ['2019_08_28', '2019_10_06', '2019_10_19', '2019_11_10', '2019_12_01']

poten_dfs = []

for i, d in enumerate(potentiostat_dates):
    
    p_df = get_potentiostat_df(d, inoculation_time_list[i])
    p_df = p_df.loc[p_df['time (days)'] >= 0][::200]
    
    poten_dfs.append(p_df)

current_df = pd.concat(poten_dfs)
current_df.head()

Unnamed: 0,exp,chamber,time (s),charge (C),current (A),time (days),current (uA)
2702,2019_08_28,0,0.0,-0.01331,1e-06,0.0,1.184
2902,2019_08_28,0,1000.0,-0.02973,5e-05,0.011574,49.72
3102,2019_08_28,0,2000.0,-0.1191,0.000111,0.023148,110.8
3302,2019_08_28,0,3000.0,-0.2363,0.000119,0.034722,119.3
3502,2019_08_28,0,4000.0,-0.3549,0.000117,0.046296,117.2


In [34]:
current_plots = current_plotter(current_df.loc[current_df['chamber'].isin(['0', '1']) & (current_df['time (days)'] <= 4)], [bokeh.palettes.Colorblind4[3], 'gainsboro'], width=435, height=300)
bokeh.io.export_svg(current_plots[0], filename='./plots/current_plot.svg')
bokeh.io.show(current_plots[0])

  for i, g in enumerate(chamber_grouped):
  for eg in exp_grouped:
  for eg in exp_grouped:


In [35]:
def spike_current_plotter(df, palette, title='Current during electrolysis', show_legend=True, width=800, height=400, spike_times=np.array([])):
    
    current_fig = bokeh.plotting.figure(height=height,
                                width=width,
                                # y_axis_type='log',
                                y_axis_label='Current (µA)',
                                x_axis_label='Time (days)',
                                title=title)

    grouped = df.groupby(['exp', 'chamber'])

    legend_items = []

    for i, g in enumerate(grouped):
        exp = g[0][0]
        ch = g[0][1]
        gdf = g[1]

        l = current_fig.line(gdf['time (days)'], gdf['current (uA)'], color=palette[int(ch)], line_width=3, muted_color=palette[int(ch)], muted_alpha=0.1)
        
        if len(spike_times) > 0:
            spike_times = list(spike_times)
            ts = [(t * 1000 / (60 * 60 * 24), t * 1000 / (60 * 60 * 24)) for t in spike_times]
            ys = [(200, 220) for t in spike_times]
            
            mlt = current_fig.multi_line(ts, ys, color='black', line_width=3)
        
        legend_items.append((f'Chamber {int(ch)} {exp}', [l]))
    
    if show_legend:
        legend = Legend(items=legend_items)
        
        current_fig.add_layout(legend, 'right')
        current_fig.legend.click_policy = 'mute'
        
    else:
        pass
    

    
    return current_fig

In [36]:
spiking_df = get_spiking_potentiostat_df('2019_11_03', 99.8, path='./data/EChem')
spiking_df.head()

Unnamed: 0,exp,chamber,time (s),charge (C),current (A),time (days),current (uA)
0,2019_11_03,0,-99800.0,7.2e-05,1.4e-05,-1.155093,14.48
1,2019_11_03,0,-99795.0,0.000109,7e-06,-1.155035,7.384
2,2019_11_03,0,-99790.0,-0.003387,0.000699,-1.154977,699.2
3,2019_11_03,0,-99785.0,-0.005144,0.000351,-1.154919,351.4
4,2019_11_03,0,-99780.0,-0.006405,0.000252,-1.154861,252.3


In [37]:
spike_times = np.array([143, 186.2, 229.4, 272.6, 315.8, 359.1]) - 99.8

In [38]:
spiking_df_from_zero = spiking_df.loc[spiking_df['time (days)'] > 0]
spiking_plot = spike_current_plotter(spiking_df_from_zero[::200], bokeh.palettes.Colorblind4, title='Spiking', spike_times=spike_times)

In [39]:
spiking_plots = []

chamber_dict = {
    0: '0 mM nitrate',
    1: '5 mM nitrate',
    2: '5 mM nitrate, respike every 24 hours',
    3: '5 mM nitrate, respike every 12 hours'
}

for ch in [0, 1, 2, 3]:
    df = spiking_df_from_zero.loc[spiking_df_from_zero['chamber'] == str(ch)]
    if ch not in [2, 3]:
        p = spike_current_plotter(df[::200], palette, title = chamber_dict[ch], show_legend=False, width=400, height=200)
    elif ch == 2:
        p = spike_current_plotter(df[::200], palette, title = chamber_dict[ch], show_legend=False, width=400, height=200, spike_times=spike_times[1::2])
    elif ch == 3:
        p = spike_current_plotter(df[::200], palette, title = chamber_dict[ch], show_legend=False, width=400, height=200, spike_times=spike_times)
    
    p.xaxis.major_label_text_font_size = '12pt'
    p.yaxis.major_label_text_font_size = '12pt'
    p.xaxis.axis_label_text_font_size = '10pt'
    p.yaxis.axis_label_text_font_size = '10pt'
    p.title.text_font_size = '12pt'
    p.output_backend = 'svg'
    
    spiking_plots.append(p)

for p in spiking_plots[:-1]:
    p.y_range = spiking_plots[-1].y_range
    p.x_range = spiking_plots[-1].x_range
    
    
spiking_faceted = bokeh.layouts.gridplot(spiking_plots, ncols=2)    
bokeh.io.show(spiking_faceted)

In [40]:
bokeh.io.export_svg(spiking_faceted, filename='./plots/nitrate_spiking.svg')

['./plots/nitrate_spiking.svg']