In [1]:
import csv
import numpy as np
import pandas as pd
import scipy.optimize
import scipy.stats
import colorcet as cc

import statsmodels.api as sm

import bokeh.io
import bokeh.plotting
import bokeh.palettes
import bokeh.models
from bokeh.transform import jitter, factor_cmap

import tqdm

import random
random.seed(42)

bokeh.io.output_notebook()

import warnings
warnings.filterwarnings('ignore')

In [2]:
def general_hill(x, Ka, n, A, B, C):
    """
    Use a generalized hill function for the calibration curve.
    """
    
    y = B + A / (C + (Ka / x) ** n)
    
    return y

def inverse_general_hill(y, Ka, n, A, B, C):
    """
    Inverse function for deriving concentrations from fluorescence values.
    """
    
    x = Ka / ((A / (y - B) - C) ** (1/n))
    
    return x

def get_calib_data(fluor_df):
    """
    Function to extract calibration data from the general dataframe(s).
    """
    
    calib_df = fluor_df.loc[fluor_df['Strain'] == 'calibration']
    calib_df['Condition Conc. (µM)'] = calib_df['Condition Conc. (µM)'].astype(float)
    
    return calib_df

def plot_calib_point(calib_df, title=None, alpha=0.05):
    """
    Plotter for the calibration data.
    """
    
    
    fig = bokeh.plotting.figure(height=400, 
                                width=600, 
                                title=title, 
                                x_axis_label='µM PCAred', 
                                y_axis_label='Fluorescence (AU)')
    
    c = fig.circle(calib_df['Condition Conc. (µM)'].values, 
                   calib_df['PCAred fluorescence (AU)'].values, 
                   size=5, 
                   alpha=alpha, 
                   legend='Calibration measurements')
    
    fig.legend.location = 'bottom_right'
    
    return fig
    
def fit_hill(calib_df, alpha=0.05, show_plot=False, p0=[150, 2, 40000, 5000, 1]):
    """
    Function to fit the generalized Hill function to the calibration data.
    """
    
    xdata = calib_df['Condition Conc. (µM)'].values
    ydata = calib_df['PCAred fluorescence (AU)'].values
    
    popt, pcov = scipy.optimize.curve_fit(general_hill, xdata, ydata, p0=p0)
    
    plot = plot_calib_point(calib_df, title='Fit of calibration model', alpha=alpha)
    
    x = np.linspace(0, 300, 100)
    fit = general_hill(x, *popt)
    
    plot.line(x, fit, color='black')
    
    if show_plot:
        bokeh.io.show(plot)
    
    return popt, pcov

def convert_fluor_to_conc(fluor_exp_df, popt):
    """
    Function to convert fluorescence measurements to concentrations.
    """
    
    fluor_exp_df['measured PCAred (µM)'] = [inverse_general_hill(f, *popt) for f in fluor_exp_df['PCAred fluorescence (AU)']]
    
    return fluor_exp_df

def fitting_pipeline(df, p0=[150, 2, 40000, 5000, 1], show_plot=False):
    """
    Function to bring above utilities together.
    """
    
    calib_df = get_calib_data(df)
    exp_df = df.loc[df['Strain'] != 'calibration']
    
    popt, pcov = fit_hill(calib_df, p0=p0, show_plot=show_plot)
    
    exp_df = convert_fluor_to_conc(exp_df, popt)
    
    return exp_df

def linear_approximation(x_array, y_array, cutoff_time):
    """
    Function to perform a linear regression of the data up to a
    specified cutoff time. Used for generating supplementary figs 5 and 6.
    """
    
    indices = x_array < cutoff_time
    
    x = x_array[indices]
    y = y_array[indices]
    
    slope, intercept, rvalue, pvalue, stderr = scipy.stats.linregress(x, y)
    
    return slope, intercept, rvalue

def linear_range_evaluator(single_strain_single_condition_df, y_var, title=None):
    """
    Assesses the goodness of linear fit over the data for all possible cutoff times.
    Used for supplementary figs 5 and 6.
    """
    
    df = single_strain_single_condition_df.dropna()
    
    x_array = df['Time [hr]'].values
    y_array = df[y_var].values
    
    try:
        start = min(x_array)
    except:
        start = 0
    
    try:
        stop = max(x_array)
    except:
        stop = 24
    
    cutoff_times = np.linspace(start, stop, 48)
    
    r_squareds = []
    slopes = []
    ints = []
    
    for ct in cutoff_times:
        
        try:
            s, i, r = linear_approximation(x_array, y_array, ct)

            r_squareds.append(r**2)
            slopes.append(s)
            ints.append(i)
            
        except:
            r_squareds.append(None)
            slopes.append(None)
            ints.append(None)
            
    
    source = bokeh.models.ColumnDataSource(data=dict(cutoff_times=cutoff_times,
                                                     r_squareds=r_squareds,
                                                     slope_int=[f'slope: {s}\nintercept:{i}' for s, i in zip(slopes, ints)])
                                          )
        
    fig = bokeh.plotting.figure(width=200,
                                height=200,
                                x_axis_label='Cutoff time (hours)',
                                y_axis_label='R-squared',
                                title=title)
    
    c = fig.circle(x='cutoff_times', y='r_squareds', source=source)
    
    l = fig.line([start + 5, start + 5], [0, 1], color='red')
    
#     labels = bokeh.models.LabelSet(x='cutoff_times', y='r_squared', text='slope_int',
#                                    x_offset=5, y_offset=5, source=source)
    
#     fig.add_layout(labels)
    
    fig.y_range = bokeh.models.Range1d(0, 1.05)
    fig.output_backend = 'svg'
    
    return fig

def linear_assessment_plotter(df, y_var='measured PCAred (µM)'):
    """
    Function to plot R**2 against cutoff times to determine a linear range to estimate
    initial rates for all the strains and conditions. Used for supplementary figs 5 and 6.
    """
    
    grouped = df.groupby(['Strain', 'Condition'])
    
    plots = []
    
    for g in grouped:
        strain = g[0][0]
        condition = g[0][1]
        mini_df = g[1]
        
        if condition == 'PCA, NO2':
            mini_df = mini_df.loc[mini_df['Time [hr]'] > 1.5]
        
        title = f'{strain} with {condition} linearity'
        
#         print(strain, condition)
        p = linear_range_evaluator(mini_df, y_var, title)
        
        plots.append(p)
        
    return plots

def get_initial_redox_rates(single_strain_single_condition_df, y='measured PCAred (µM)', verbose=True):
    
    mdf = single_strain_single_condition_df
    
    condition = mdf['Condition'].unique()
    strain = mdf['Strain'].unique()
    
    mdf = mdf.dropna()
    
    try:
        first_data_time = min(mdf['Time [hr]'].values)
    except:
        first_data_time = 0
    
    if condition != 'PCA, NO2':

        lin_reg_df = mdf.loc[mdf['Time [hr]'] <= 5 + first_data_time]
        
    else:
#         first_data_time = 0
        lin_reg_df = mdf.loc[(mdf['Time [hr]'] > 1.5) & (mdf['Time [hr]'] <= 5 + first_data_time)]
        
    
    if len(lin_reg_df) == 0:
        if verbose == True:
            print(f"""
            {strain}
            ------
            {condition}: no detectable redox
            """)
        
        return 0, 0, 0
        
    else:    
        slope, inter, r, p, e = scipy.stats.linregress(lin_reg_df['Time [hr]'].values,
                                               lin_reg_df[y].values)
        
        if verbose == True:
            print(f"""
            {strain}
            ------
            {condition}: init redox rate {slope:.2f} +/- {e*1.96:.2f} µM/hr
            """)
        
        return slope, inter, e
        

def plotter(df, 
            plot_grouping, 
            color_grouping, 
            y='measured PCAred (µM)', 
            y_axis_label='Reduced PCA (µM)', 
            x_axis_label='Time (hrs)',
            palette=bokeh.palettes.Colorblind6):
    
    """
    Function to plot the data for paper figures.
    
    plot_grouping and color_grouping are either "Condition" or "Strain"
    """
    
    plot_grouped = df.groupby(plot_grouping)
    
    plots = []
    
    for grp in plot_grouped:
        
        title = f"{plot_grouping}: {grp[0]}"
        
        fig = bokeh.plotting.figure(
                width=600,
                height=300,
                title=title,
                y_axis_label=y_axis_label,
                x_axis_label=x_axis_label
            )
        
        mini_df = grp[1]
        
        color_grouped = mini_df.groupby(color_grouping)
        
#         palette = list(bokeh.palettes.Colorblind6)
        palette = list(palette)
        palette[0] = 'grey'
        
        legend_items = []
        
        for i, g in enumerate(color_grouped):
            
            label = g[0]
            
            mdf = g[1]
            
            try:
                wells = mdf['Well'].unique()
            
            except:
                wells = []
            
            if len(wells) > 1: # Need to account for replicates
                
                measurement_arrays = []
                cs = []
                for w in wells:
                    time = mdf.loc[mdf['Well'] == w]['Time [hr]'].values
                    measurement = mdf.loc[mdf['Well'] == w][y].values
                    
                    measurement_arrays.append(measurement)
                    
                    c = fig.circle(time, measurement, color = palette[i], alpha=0.05, size=3)
                    cs.append(c)
                    
                mean = sum(measurement_arrays) / len(measurement_arrays)
                
                l = fig.line(time, mean, color = palette[i], alpha=0.7, line_width=3)
                
                legend_items.append((label, [l, *cs]))
#                 print(legend_items)
                
            else:
                time = mdf['Time [hr]'].values
                measurement = mdf[y].values
                
                c = fig.circle(time, measurement, color = palette[i], alpha=0.7)
                
                legend_items.append((label, [c]))
                
            if plot_grouping == 'Strain' and y == 'measured PCAred (µM)':
                
                slope, inter, e = get_initial_redox_rates(mdf)
                
                if slope != 0:
                    
                    lin_x = np.linspace(0,24,5)
                    lin_y = slope * lin_x + inter
                    reg_l = fig.line(lin_x, lin_y, color = palette[i], line_dash='dashed', line_width=2, alpha=0.8)
        
        if y == 'measured PCAred (µM)':
            fig.y_range = bokeh.models.Range1d(-5, 205)
            
        elif y == 'OD600' or y == 'Mean OD600':
            fig.y_range = bokeh.models.Range1d(0, 0.3)
        
        legend = bokeh.models.Legend(items=legend_items)
        legend.click_policy = "hide"
        
        fig.add_layout(legend, 'right')
        
        fig.legend.label_text_font_style = "italic"
        fig.legend.label_text_font_size = '12pt'
        fig.title.text_font_size = "14pt"
        
        fig.yaxis.axis_label_text_font_size = '12pt'
        fig.xaxis.axis_label_text_font_size = '12pt'
        
        fig.yaxis.major_label_text_font_size = '10pt'
        fig.xaxis.major_label_text_font_size = '10pt'
        
        fig.output_backend = 'svg'
        
        plots.append(fig)
        
        for p in plots[1:]:
            p.x_range = plots[0].x_range
            p.y_range = plots[0].y_range
            
    return plots
    
def calibrate_by_time_point(df, p0=[150, 2, 40000, 5000, 1], show_plot=False):
    """
    Function to calibrate the data by each time point
    """
    grouped = df.groupby('Time [hr]')
    
    calibrated_dfs = []
    
    for grp in grouped:
        t = grp[0]
        mini_df = grp[1]
        
        mini_df = fitting_pipeline(mini_df, p0=p0, show_plot=show_plot)
        
        calibrated_dfs.append(mini_df)
        
    calibrated = pd.concat(calibrated_dfs)
    
    return calibrated

### Load the data

In [3]:
data = pd.read_csv('./data/tidy_fluorescence_data_all_experiments.csv', index_col=0)

In [4]:
data.head()

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,pregrowth condition
0,0.072,A1,38343.0,calibration,Shaking,PCA,250,2021_10_19,shaking
1,0.156,A1,38470.0,calibration,Shaking,PCA,250,2021_10_19,shaking
2,0.239,A1,38676.0,calibration,Shaking,PCA,250,2021_10_19,shaking
3,0.322,A1,38689.0,calibration,Shaking,PCA,250,2021_10_19,shaking
4,0.406,A1,38777.0,calibration,Shaking,PCA,250,2021_10_19,shaking


### The different experiments were performed with either an old or new plate reader, and the values for fluorescence were different between them. Calibrate by date. First, check that calibration parameters are working okay for the overall curves. Then, calibrate by time point.

In [5]:
date_grouping = data.groupby('date')

for grp in date_grouping:
    date = grp[0]
    df = grp[1]
    
    calib_df = get_calib_data(df)
#     print(np.max(calib_df['Condition Conc. (µM)'].values))
    
    print(date)
    popt, pcov = fit_hill(calib_df, alpha=0.1, show_plot=True, p0=[150, 2, 40000, 5000, 1])

2021_10_19


2021_12_03


2021_12_27


2021_12_28


2022_01_07


2022_05_14


2022_05_18


2022_06_06


2022_06_28


2022_07_13


2022_09_23


2022_09_27


2022_09_29


2022_10_05


2022_10_06


2022_10_07


2022_10_11


2022_11_22


### Looks like those initial parameters are good for the curve fits. Now calibrate each experiment by time point

In [50]:
date_grouping = data.groupby('date')

calibrated_dfs = []

for grp in tqdm.tqdm(date_grouping):
    date = grp[0]
    df = grp[1]
    
    calibrated_df = calibrate_by_time_point(df, p0=[150, 2, 40000, 5000, 1])
    
    calibrated_dfs.append(calibrated_df)
    
    

100%|██████████| 18/18 [02:08<00:00,  7.12s/it]


In [51]:
calibrated_dfs[8]

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,pregrowth condition,measured PCAred (µM)
6924,0.068,B1,47598.0,WT1,Standing,PCA,200,2022_06_28,standing,198.214980
7501,0.068,B2,47148.0,WT2,Standing,PCA,200,2022_06_28,standing,194.281223
8078,0.068,B3,47437.0,WT3,Standing,PCA,200,2022_06_28,standing,196.799113
8655,0.068,B4,47498.0,WT1,Standing,"PCA, NO3","200, 10000",2022_06_28,standing,197.334443
9232,0.068,B5,47923.0,WT2,Standing,"PCA, NO3","200, 10000",2022_06_28,standing,201.102290
...,...,...,...,...,...,...,...,...,...,...
53065,46.568,H8,6163.0,blank,Standing,PCA,0,2022_06_28,standing,
53642,46.568,H9,6928.0,blank,Standing,PCA,0,2022_06_28,standing,
54219,46.568,H10,7632.0,blank,Standing,PCA,0,2022_06_28,standing,5.279595
54796,46.568,H11,7810.0,blank,Standing,PCA,0,2022_06_28,standing,6.464272


### Aggregate the technical replicates (first approach)

In [52]:
tech_agg = calibrated_dfs[8].groupby(
    [
        'Time [hr]', 
        'Strain', 
        'Medium', 
        'Condition', 
        'Condition Conc. (µM)', 
        'pregrowth condition', 
        'date', 
#         'Well'
#         'replicate type', # Replaced this with numbers after strain ID signifying bio reps
    ]
).agg('mean').reset_index()

In [53]:
handled_replicate_types = []

for cdf in calibrated_dfs:
    
    # Biological replicates have a number after the strain ID
    # Technical replicates do not
    # Thus, can just group by strain and forget the wells
    
    # Aggregate over the strain name to catch the technical replicates
    cdf = cdf.groupby(
        [
            'Time [hr]', 
            'Strain', 
            'Medium', 
            'Condition', 
            'Condition Conc. (µM)', 
            'pregrowth condition', 
            'date', 
#             'replicate type'
        ]
    ).agg('mean').reset_index()
        
    handled_replicate_types.append(cdf)
        

In [54]:
calibrated_dfs[8].head()

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,pregrowth condition,measured PCAred (µM)
6924,0.068,B1,47598.0,WT1,Standing,PCA,200,2022_06_28,standing,198.21498
7501,0.068,B2,47148.0,WT2,Standing,PCA,200,2022_06_28,standing,194.281223
8078,0.068,B3,47437.0,WT3,Standing,PCA,200,2022_06_28,standing,196.799113
8655,0.068,B4,47498.0,WT1,Standing,"PCA, NO3","200, 10000",2022_06_28,standing,197.334443
9232,0.068,B5,47923.0,WT2,Standing,"PCA, NO3","200, 10000",2022_06_28,standing,201.10229


In [55]:
handled_replicate_types[8]

Unnamed: 0,Time [hr],Strain,Medium,Condition,Condition Conc. (µM),pregrowth condition,date,PCAred fluorescence (AU),measured PCAred (µM)
0,0.068,Abiotic,Standing,PCA,200,standing,2022_06_28,48426.000000,205.652449
1,0.068,Abiotic,Standing,"PCA, DMSO","200, 10000",standing,2022_06_28,48806.000000,209.178228
2,0.068,Abiotic,Standing,"PCA, NO3","200, 10000",standing,2022_06_28,48073.333333,202.477974
3,0.068,Abiotic,Standing,"PCA, TMAO","200, 10000",standing,2022_06_28,28157.333333,75.484572
4,0.068,WT1,Standing,PCA,200,standing,2022_06_28,47598.000000,198.214980
...,...,...,...,...,...,...,...,...,...
36330,46.568,torA-tlKO2,Standing,"PCA, TMAO","200, 10000",standing,2022_06_28,8517.000000,10.724195
36331,46.568,torA-tlKO3,Standing,PCA,200,standing,2022_06_28,47900.000000,215.330154
36332,46.568,torA-tlKO3,Standing,"PCA, DMSO","200, 10000",standing,2022_06_28,12552.000000,30.688618
36333,46.568,torA-tlKO3,Standing,"PCA, NO3","200, 10000",standing,2022_06_28,3182.000000,


### Concatenated into one big boy

In [56]:
calibrated_data = pd.concat(handled_replicate_types)

In [57]:
calibrated_data

Unnamed: 0,Time [hr],Strain,Medium,Condition,Condition Conc. (µM),pregrowth condition,date,PCAred fluorescence (AU),measured PCAred (µM)
0,0.072,Abiotic,Shaking,PCA,200,shaking,2021_10_19,35779.666667,204.000219
1,0.072,Abiotic,Shaking,"PCA, NO3","200, 10000",shaking,2021_10_19,35346.333333,197.470078
2,0.072,WT,Shaking,PCA,200,shaking,2021_10_19,33997.333333,178.806133
3,0.072,WT,Shaking,"PCA, NO3","200, 10000",shaking,2021_10_19,33820.666667,176.745257
4,0.072,napA-tlKO,Shaking,PCA,200,shaking,2021_10_19,35629.000000,201.689624
...,...,...,...,...,...,...,...,...,...
25383,48.071,napAnarZnarG pFE21-NapA2,Standing,"PCA, NO3","200, 10000",standing,2022_11_22,41650.000000,169.780328
25384,48.071,napAnarZnarG pFE21-NapA3,Standing,"PCA, NO3","200, 10000",standing,2022_11_22,42015.000000,172.610572
25385,48.071,napAnarZnarG pFE21-NarZ1,Standing,"PCA, NO3","200, 10000",standing,2022_11_22,25497.000000,76.793877
25386,48.071,napAnarZnarG pFE21-NarZ2,Standing,"PCA, NO3","200, 10000",standing,2022_11_22,29731.000000,96.493120


In [58]:
calibrated_data.loc[calibrated_data['date'] == '2021_12_28']['Medium'].unique()

array(['PCA oxidizer basal medium', 'Shaking + POBM', 'Standing + POBM',
       'Standing + POBM + NO3'], dtype=object)

In [59]:
calibrated_data['Condition'].unique()

array(['PCA', 'PCA, NO3', 'PCA, DMSO', 'PCA, O2', 'PCA, TMAO', 'PCA, Fum',
       'PCAox', 'PCAox, Fum'], dtype=object)

In [60]:
calibrated_data['Medium'].unique()

array(['Shaking', 'PCA oxidizer basal medium', 'Shaking + POBM',
       'Standing + POBM', 'Standing + POBM + NO3', 'blank',
       'Shaking + 4-HB', 'Standing', 'Standing + NO3', 'basal medium'],
      dtype=object)

#### Try a sample plot of all the WT NO3 data

In [61]:
sanity_check = calibrated_data.loc[
    
    (calibrated_data['Strain'] != 'Blank') 
    & (calibrated_data['Strain'] != 'blank') 
    & (calibrated_data['Strain'] != 'frdAB-tlKO3') # This one had wonky plate reader data and gives an outlier for the PCA oxidation rate
    & (calibrated_data['Condition'].isin([
                                       'PCA', 
                                       'PCA, Fum', 
                                       'PCA, NO3', 
#                                        'PCA, DMSO', 
#                                        'PCA, TMAO'
                                   ])) &
#                                    (calibrated_data['pregrowth condition'].isin(['standing'])) &
                                   (calibrated_data['Medium'].isin(['Standing', 'Standing + POBM'])) #&
#                                    (calibrated_data['Time [hr]'] <= 20)
#                                    & (calibrated_data['date'].isin(['2021_12_28', '2022_10_06', '2022_10_07']))
                                  ]

sanity_check.head()

Unnamed: 0,Time [hr],Strain,Medium,Condition,Condition Conc. (µM),pregrowth condition,date,PCAred fluorescence (AU),measured PCAred (µM)
2,0.071,WT,Standing + POBM,"PCA, NO3","200, 10000",comparison,2021_12_27,38644.333333,194.085488
5,0.071,napA-tlKO,Standing + POBM,"PCA, NO3","200, 10000",comparison,2021_12_27,38454.0,191.367789
8,0.071,napAnarZ-tlKO,Standing + POBM,"PCA, NO3","200, 10000",comparison,2021_12_27,38132.666667,187.00293
11,0.071,napAnarZnarG-tlKO,Standing + POBM,"PCA, NO3","200, 10000",comparison,2021_12_27,36963.333333,171.874924
14,0.071,narG-tlKO,Standing + POBM,"PCA, NO3","200, 10000",comparison,2021_12_27,38433.333333,191.144584


In [62]:
sanity_check = sanity_check.sort_values(by=['Strain', 'Condition', 'Time [hr]'])

In [63]:
sanity_check.iloc[['discard' not in s.lower() for s in sanity_check['Strain'].values]]

Unnamed: 0,Time [hr],Strain,Medium,Condition,Condition Conc. (µM),pregrowth condition,date,PCAred fluorescence (AU),measured PCAred (µM)
0,0.068,Abiotic,Standing,PCA,200,standing,2022_06_06,49400.000000,200.945810
0,0.068,Abiotic,Standing,PCA,200,standing,2022_06_28,48426.000000,205.652449
0,0.068,Abiotic,Standing,PCA,200,standing,2022_07_13,45936.000000,200.405101
0,0.068,Abiotic,Standing,PCA,200,standing,2022_10_07,46650.166667,218.353442
76,0.151,Abiotic,Standing,PCA,200,standing,2022_06_06,49693.000000,201.377004
...,...,...,...,...,...,...,...,...,...
7978,23.734,ubiC-tlKO4,Standing,"PCA, NO3","200, 10000",standing,2022_10_06,4011.666667,1.084002
8006,23.817,ubiC-tlKO4,Standing,"PCA, NO3","200, 10000",standing,2022_10_06,4027.333333,1.134257
8034,23.901,ubiC-tlKO4,Standing,"PCA, NO3","200, 10000",standing,2022_10_06,4028.000000,1.221904
8062,23.984,ubiC-tlKO4,Standing,"PCA, NO3","200, 10000",standing,2022_10_06,4017.666667,1.069084


In [64]:
sanity_check = sanity_check.iloc[['discard' not in s.lower() for s in sanity_check['Strain'].values]]

In [65]:
sanity_check['date'].unique()

array(['2022_06_06', '2022_06_28', '2022_07_13', '2022_10_07',
       '2022_01_07', '2022_10_11', '2022_11_22', '2021_12_28',
       '2022_10_06', '2022_09_27', '2022_09_29', '2022_10_05',
       '2021_12_27'], dtype=object)

In [66]:
sanity_plot = bokeh.plotting.figure(width=600, height=400)

sanity_plot.circle(x=sanity_check['Time [hr]'].values, y=sanity_check['measured PCAred (µM)'].values)

bokeh.io.show(sanity_plot)

### Try to do a LOWESS fit and get a derivative

In [67]:
single_data = calibrated_data.loc[(calibrated_data['Strain'].isin([
#                                                                     'Abiotic', 
#                                                                     'WT1', 
#                                                                     'napAnarZnarG-tlKO', 
#                                                                     'frdA-tlKO', 
#                                                                     'dmsA-tlKO', 
#                                                                     'menA-tlKO', 
#                                                                     'menAubiC-tlKO',
                                                                    'frdAB-tlKO3'
                                                                   ])) & 
                                   (calibrated_data['Condition'].isin([
#                                        'PCA', 
                                       'PCA, Fum', 
#                                        'PCA, NO3', 
#                                        'PCA, DMSO', 
#                                        'PCA, TMAO'
                                   ])) #&
# #                                    (calibrated_data['pregrowth condition'].isin(['standing'])) &
                                   & (calibrated_data['Medium'].isin(['Standing', 'Standing + POBM'])) #&
# #                                    (calibrated_data['Well'] == 'E10') #&
#                                    & (calibrated_data['date'].isin(['2022_01_07']))
                                  ]

In [68]:
single_plot = bokeh.plotting.figure(width=600, height=400)

single_plot.circle(x=single_data['Time [hr]'].values, y=single_data['measured PCAred (µM)'].values)

bokeh.io.show(single_plot)

In [69]:
smoothed = sm.nonparametric.lowess(
    exog=single_data['Time [hr]'].values, 
    endog=single_data['measured PCAred (µM)'].values,
    is_sorted=False,
    return_sorted=True,
    frac=24/len(single_data['Time [hr]'].values) # this means the LOWESS is calculated over a window of 2 hours (24 * 5 min is 2 hours)
)

In [70]:
single_plot.line(x=smoothed[:,0], y=smoothed[:,1], color='red', line_width=2)
bokeh.io.show(single_plot)

In [71]:
derivative = np.gradient(smoothed[:,1], 5/60) # 5/60 is the time (in hours) between data points for these data

In [72]:
deriv_plot = bokeh.plotting.figure(width=600, height=400)

deriv_plot.line(x=smoothed[:,0], y=derivative)

bokeh.io.show(deriv_plot)

In [73]:
np.max(derivative * -1)

1189.0714812141546

In [74]:
def lowess_fit(single_data_df, timestep=5/60, window=24, plot=True):
    """
    Function to perform a LOWESS fit of PCA oxidation data.
    
    Params:
    -------
    single_data_df : pandas DataFrame
        The data for a single strain under a single condition from a single experiment.
        This is either a biological replicate from a single well or the mean of
        technical replicates from a given day.
        
    timestep : float, default 5/60
        The interval (in hours) between measurements in the timeseries
        
    window : int, default 24
        The number of time points oover which the LOWESS algorith should smooth.
        If the timestep is 5/60, a window of 24 corresponds to 2 hours
        
    plot : Bool, default True
        Should this function plot the data and fit?
    """
    
    x = single_data_df['Time [hr]'].values
    y = single_data_df['measured PCAred (µM)'].values
    
    smoothed = sm.nonparametric.lowess(
        exog=x, 
        endog=y,
        is_sorted=False,
        return_sorted=True,
        frac=window/len(x)
    )
    
    if plot:
        lowess_fit_plot = bokeh.plotting.figure(width=600, height=400)
        
        lowess_fit_plot.circle(x=x, y=y)
        lowess_fit_plot.line(smoothed[:,0], smoothed[:,1], line_width=2, color='red')
        
        bokeh.io.show(lowess_fit_plot)
    
    return smoothed[:,0], smoothed[:,1]
    

In [75]:
lx, ly = lowess_fit(single_data)

In [76]:
def get_lowess_derivative(lowess_x, lowess_y, timestep=5/60, plot=True):
    """
    Function to calculate the derivative of a LOWESS fit of PCA oxidation data.
    
    Params:
    -------
    lowess_x : numpy array
        The x-axis (time points) of the lowess fit
        
    lowess_y : numpy array
        The y-axis (PCAred concentration) of the lowess fit
        
    timestep : float, default 5/60
        The interval (in hours) between measurements in the timeseries
        
    plot : Bool, default True
        Should this function plot the data and fit?
    """
    
    derivative = np.gradient(lowess_y, timestep)
    
    if plot:
        lowess_derivative_plot = bokeh.plotting.figure(width=600, height=400)
        
        lowess_derivative_plot.line(lowess_x, derivative)
        
        bokeh.io.show(lowess_derivative_plot)
    
    return derivative

In [77]:
der = get_lowess_derivative(lx, ly)

In [78]:
def get_max_oxidation_rate_from_lowess_fit(single_data_df, timestep=5/60, window=24, plot=True):
    """
    Function to perform a LOWESS fit of PCA oxidation data.
    
    Params:
    -------
    single_data_df : pandas DataFrame
        The data for a single strain under a single condition from a single experiment.
        This is either a biological replicate from a single well or the mean of
        technical replicates from a given day.
        
    timestep : float, default 5/60
        The interval (in hours) between measurements in the timeseries
        
    window : int, default 24
        The number of time points oover which the LOWESS algorith should smooth.
        If the timestep is 5/60, a window of 24 corresponds to 2 hours
        
    plot : Bool, default True
        Should this function plot the data and fit?
    """
    
    lowess_x, lowess_y = lowess_fit(single_data_df, timestep=timestep, window=window, plot=plot)
    derivative = get_lowess_derivative(lowess_x, lowess_y, timestep=timestep, plot=plot)
    
    max_oxidation_rate = np.max(derivative * -1)
    
    return max_oxidation_rate

In [79]:
get_max_oxidation_rate_from_lowess_fit(single_data)

1189.0714812141546

### Try to generate some plots!

In [80]:
def linear_fit_scan(single_condition_df, y_var='measured PCAred (µM)', window = 2, init=1):
    
    time = single_condition_df['Time [hr]'].values
    ys = single_condition_df[y_var].values
    
    max_slope_err = 0
    max_slope = 0
    
    for t in time:
#         print(max_slope)
        if (t >= init) & (t < time[-1] - window):
            
            bool_window = (time >= t) & (time < t + window)
#             print(time[bool_window], ys[bool_window])
            
            slope, intercept, rvalue, pvalue, stderr = scipy.stats.linregress(
                time[bool_window], ys[bool_window])
            
            if np.abs(slope) > np.abs(max_slope):
#                 print(slope, max_slope)
                max_slope_err = stderr
                max_slope = slope
            
    
#     print(time[bool_window])
#     print(f'max redox rate {max_slope:.2f} +/- {max_slope_err*1.96:.2f} µM/hr')
    return max_slope, max_slope_err*1.96

In [81]:
def get_lowess_rate_df(calibrated_df, grouping=['date', 'Strain', 'Condition', 'Medium']):
    palette = bokeh.palettes.Colorblind4

    cmap = {
        'PCA': 'white',
        'PCA, DMSO': palette[1],
        'PCA, NO3': palette[0],
        'PCA, Fum': palette[2],
        'PCA, TMAO': palette[3]
    }
    
    grouped = calibrated_df.groupby(grouping)
    
    groups = []
    dates = []
    max_redox_rates = []
#     max_redox_rate_errors = []
    fill_colors = []
    line_colors = []
    
    for grp in tqdm.tqdm(grouped):
        dates.append(grp[0][0])
        condition = grp[0][2]
        fill_colors.append(cmap[condition])
        
        strain = grp[0][1]
        if strain[-1] in [str(x) for x in range(10)]:
            # biological replicate
            line_colors.append('black')
            # strip the number so that the replicates plot together
            strain = strain[:-1]
        else:
            #technical replicate
            line_colors.append('red')
        
        
        groups.append((strain, condition))
        max_rate = get_max_oxidation_rate_from_lowess_fit(grp[1], plot=False)
        max_redox_rates.append(max_rate)
#         max_redox_rate_errors.append(max_err)

#     error_bars = []
#     for r, e in zip (max_redox_rates, max_redox_rate_errors):
#         error_bars.append([r*-1 - e, r*-1 + e])
        
    plot_df = pd.DataFrame({'max rate': np.array(max_redox_rates), 
#                                 'err': max_redox_rate_errors, 
#                                 'err_bars': error_bars,
                                'date': dates,
                                'cat': groups,
                                'fill color': fill_colors,
                                'line color': line_colors
                           })
    
    return plot_df

In [82]:
def get_plot_df(calibrated_df, grouping=['date', 'Strain', 'Condition', 'Medium']):
    
    palette = bokeh.palettes.Colorblind4
    
    cmap = {
        'PCA': 'white',
        'PCA, DMSO': palette[1],
        'PCA, NO3': palette[0],
        'PCA, Fum': palette[2],
        'PCA, TMAO': palette[3]
    }
    
    grouped = calibrated_df.groupby(grouping)
    
    groups = []
    max_redox_rates = []
    max_redox_rate_errors = []
    fill_colors = []
    line_colors = []
    dates = []

    for grp in tqdm.tqdm(grouped):
    #     if grp[0][:2] not in groups:
        dates.append(grp[0][0])
        condition = grp[0][2]
        fill_colors.append(cmap[condition])
        
        strain = grp[0][1]
        if strain[-1] in [str(x) for x in range(10)]:
            # biological replicate
            line_colors.append('black')
            # strip the number so that the replicates plot together
            strain = strain[:-1]
        else:
            #technical replicate
            line_colors.append('red')
        
        
        groups.append((strain, condition))
        
        max_rate, max_err = linear_fit_scan(grp[1])
        max_redox_rates.append(max_rate)
        max_redox_rate_errors.append(max_err)

    error_bars = []
    for r, e in zip (max_redox_rates, max_redox_rate_errors):
        error_bars.append([r*-1 - e, r*-1 + e])
        
    plot_df = pd.DataFrame({'max rate': np.array(max_redox_rates)*-1, 
                                'err': max_redox_rate_errors, 
                                'err_bars': error_bars, 
                                'cat': groups,
                                'date': dates,
                                'fill color': fill_colors,
                                'line color': line_colors
                           })
    
    return plot_df

In [83]:
lowess_plot_df = get_lowess_rate_df(sanity_check)

100%|██████████| 275/275 [00:26<00:00, 10.23it/s]


In [84]:
lowess_plot_df

Unnamed: 0,max rate,date,cat,fill color,line color
0,30.031454,2021_12_27,"(WT, PCA, NO3)",#0072B2,red
1,23.997372,2021_12_27,"(napA-tlKO, PCA, NO3)",#0072B2,red
2,16.194231,2021_12_27,"(napAnarZ-tlKO, PCA, NO3)",#0072B2,red
3,3.983294,2021_12_27,"(napAnarZnarG-tlKO, PCA, NO3)",#0072B2,red
4,17.748390,2021_12_27,"(narG-tlKO, PCA, NO3)",#0072B2,red
...,...,...,...,...,...
270,1.273550,2022_11_22,"(napAnarZnarG pFE21-NapA, PCA, NO3)",#0072B2,black
271,4.238274,2022_11_22,"(napAnarZnarG pFE21-NapA, PCA, NO3)",#0072B2,black
272,4.431691,2022_11_22,"(napAnarZnarG pFE21-NarZ, PCA, NO3)",#0072B2,black
273,4.095708,2022_11_22,"(napAnarZnarG pFE21-NarZ, PCA, NO3)",#0072B2,black


In [85]:
sanity_plot_df = get_plot_df(sanity_check)
sanity_plot_df

100%|██████████| 275/275 [00:47<00:00,  5.81it/s]


Unnamed: 0,max rate,err,err_bars,cat,date,fill color,line color
0,29.651285,0.340763,"[29.310521397027912, 29.99204767562898]","(WT, PCA, NO3)",2021_12_27,#0072B2,red
1,23.871843,0.449227,"[23.422616012764784, 24.32107090492606]","(napA-tlKO, PCA, NO3)",2021_12_27,#0072B2,red
2,16.188445,0.351624,"[15.836821305508689, 16.54006840840356]","(napAnarZ-tlKO, PCA, NO3)",2021_12_27,#0072B2,red
3,-7.317642,1.471158,"[-8.788800032624966, -5.846483232980877]","(napAnarZnarG-tlKO, PCA, NO3)",2021_12_27,#0072B2,red
4,17.940886,0.300178,"[17.640707444911307, 18.241064263096497]","(narG-tlKO, PCA, NO3)",2021_12_27,#0072B2,red
...,...,...,...,...,...,...,...
270,0.482533,0.392950,"[0.08958302428861076, 0.8754833839187695]","(napAnarZnarG pFE21-NapA, PCA, NO3)",2022_11_22,#0072B2,black
271,0.639046,0.327035,"[0.31201047465576687, 0.966081354704635]","(napAnarZnarG pFE21-NapA, PCA, NO3)",2022_11_22,#0072B2,black
272,3.153715,0.220300,"[2.9334141488152063, 3.374014993268424]","(napAnarZnarG pFE21-NarZ, PCA, NO3)",2022_11_22,#0072B2,black
273,2.630498,0.253586,"[2.3769121040605374, 2.884084314800814]","(napAnarZnarG pFE21-NarZ, PCA, NO3)",2022_11_22,#0072B2,black


In [86]:
sanity_cds = bokeh.models.ColumnDataSource(sanity_plot_df)

In [87]:
factors = bokeh.models.FactorRange(factors=sanity_plot_df['cat'].unique())

TOOLTIPS = [
    ("date", "@date")
]


circle_error_plot = bokeh.plotting.figure(
    x_range=factors,
#     y_axis_type='log',
    height=600,
    width=4000,
    tooltips=TOOLTIPS,
    y_axis_label = 'max. PCA oxidation rate (µM/hr)'
)

# box_plot.vbar(x='cat', top='max rate', fill_color='white', alpha=0.5, line_color='black', line_width=2, line_alpha=0.7, source=agg_cds)


circle = circle_error_plot.circle(y='max rate', 
#                 x=jitter('cat', range=box_plot.x_range, width=0.6),
                x='cat',
                source=sanity_cds, 
                fill_color='fill color', 
                line_color='line color', 
                line_width=2, 
                size=10,
                alpha=0.7)

circle_error_plot.multi_line(xs=[[g, g] for g in sanity_plot_df['cat'].values], 
                    ys=sanity_plot_df['err_bars'].values, 
                    color='black', 
                    line_width=1.5, 
                    alpha=0.5)



circle_error_plot.xaxis.major_label_orientation = np.pi/4
# box_plot.xaxis.group_label_orientation = np.pi/2
circle_error_plot.xgrid.grid_line_color = None
circle_error_plot.output_backend = 'svg'

bokeh.io.show(circle_error_plot)

In [88]:
# bokeh.io.export_svg(circle_error_plot, filename='./plots/MBL2022_genetics.svg')

In [89]:
lowess_cds = bokeh.models.ColumnDataSource(lowess_plot_df)
factors = bokeh.models.FactorRange(factors=lowess_plot_df['cat'].unique())

TOOLTIPS = [
    ("date", "@date")
]

lowess_rate_plot = bokeh.plotting.figure(
    x_range=factors,
#     y_axis_type='log',
    height=600,
    width=4000,
    tooltips=TOOLTIPS,
    y_axis_label = 'max PCA oxidation rate (µM/hr)'
)

# box_plot.vbar(x='cat', top='max rate', fill_color='white', alpha=0.5, line_color='black', line_width=2, line_alpha=0.7, source=agg_cds)

circle = lowess_rate_plot.circle(y='max rate', 
                x=jitter('cat', range=lowess_rate_plot.x_range, width=0.6),
#                 x='cat',
                source=lowess_cds, 
                fill_color='fill color', 
                line_color='line color', 
                line_width=2, 
                size=10,
                alpha=0.7)

# circle_error_plot.multi_line(xs=[[g, g] for g in sanity_plot_df['cat'].values], 
#                     ys=sanity_plot_df['err_bars'].values, 
#                     color='black', 
#                     line_width=1.5, 
#                     alpha=0.5)



lowess_rate_plot.xaxis.major_label_orientation = np.pi/4
# box_plot.xaxis.group_label_orientation = np.pi/2
lowess_rate_plot.xgrid.grid_line_color = None
lowess_rate_plot.output_backend = 'svg'

bokeh.io.show(lowess_rate_plot)

### Check out all the strains

In [80]:
all_strains = calibrated_data.loc[
#                                    (calibrated_data['Strain'] == 'MBL') & 
                                   (calibrated_data['Condition'].isin(['PCAred', 'PCAred, Fum', 'PCAred, NO3', 'PCAred, DMSO'])) &
                                   (calibrated_data['pregrowth condition'].isin(['standing', 'standing*', 'shaking', 'comparison'])) &
                                   (calibrated_data['Medium'].isin(['basal medium', 'LB standing pregrow', 'LB shaking pregrow'])) &
                                   (calibrated_data['Time [hr]'] <= 20)
#                                    & (calibrated_data['date'].isin(['2021_12_27', '2021_12_28']))
                                  ]

all_strains.tail()

Unnamed: 0,Time [hr],Strain,Medium,Condition,Condition Conc. (µM),pregrowth condition,date,replicate type,PCAred fluorescence (AU),measured PCAred (µM),Well
52746,19.984,Abiotic,basal medium,PCAred,200,standing,2022_07_13,biological,46542.0,194.559819,H8
53323,19.984,Abiotic,basal medium,PCAred,200,standing,2022_07_13,biological,46167.0,191.327774,H9
53900,19.984,Abiotic,basal medium,"PCAred, NO3","200, 10000",standing,2022_07_13,biological,45841.0,188.567163,H10
54477,19.984,Abiotic,basal medium,"PCAred, NO3","200, 10000",standing,2022_07_13,biological,44754.0,179.675825,H11
55054,19.984,Abiotic,basal medium,"PCAred, NO3","200, 10000",standing,2022_07_13,biological,46384.0,193.190565,H12


In [81]:
all_strains_plot_df = get_plot_df(all_strains)
all_strains_plot_df

technical
max redox rate -1.78 +/- 0.70 µM/hr
technical
max redox rate -1.43 +/- 0.66 µM/hr
technical
max redox rate -1.22 +/- 0.50 µM/hr
technical
max redox rate -41.21 +/- 0.59 µM/hr
technical
max redox rate -1.55 +/- 0.69 µM/hr
technical
max redox rate -43.63 +/- 0.99 µM/hr
technical
max redox rate -1.42 +/- 0.78 µM/hr
technical
max redox rate -32.95 +/- 0.64 µM/hr
technical
max redox rate 1.27 +/- 0.57 µM/hr
technical
max redox rate -25.40 +/- 0.46 µM/hr
technical
max redox rate -0.82 +/- 0.73 µM/hr
technical
max redox rate -39.67 +/- 1.08 µM/hr
technical
max redox rate -1.41 +/- 0.81 µM/hr
technical
max redox rate -33.71 +/- 0.65 µM/hr
technical
max redox rate -1.58 +/- 1.02 µM/hr
technical
max redox rate -19.23 +/- 0.35 µM/hr
technical
max redox rate -1.14 +/- 0.83 µM/hr
technical
max redox rate -24.73 +/- 0.41 µM/hr
technical
max redox rate -1.57 +/- 1.22 µM/hr
technical
max redox rate -2.51 +/- 0.88 µM/hr
technical
max redox rate 2.02 +/- 1.47 µM/hr
technical
max redox rate -35

Unnamed: 0,max rate,err,err_bars,cat,fill color,line color
0,1.783507,0.700194,"[1.0833134643394737, 2.4837005564978707]","(Abiotic, PCAred)",white,red
1,1.427092,0.658575,"[0.7685173980030182, 2.0856673638048613]","(Abiotic, PCAred, NO3)",#0072B2,red
2,1.217313,0.502559,"[0.7147541655114816, 1.7198720998138501]","(MBL, PCAred)",white,red
3,41.214800,0.592226,"[40.62257352725364, 41.807026231743116]","(MBL, PCAred, NO3)",#0072B2,red
4,1.547365,0.690401,"[0.8569648650876008, 2.2377661111679727]","(MBL napA-tlKO, PCAred)",white,red
...,...,...,...,...,...,...
296,-0.903193,0.332712,"[-1.235904917157033, -0.570480561805796]","(MBL narZ-tlKO, PCAred)",white,black
297,1.375994,0.528162,"[0.8478327566007628, 1.9041558533354532]","(MBL narZ-tlKO, PCAred)",white,black
298,38.586814,0.434593,"[38.1522215477733, 39.02140722023437]","(MBL narZ-tlKO, PCAred, NO3)",#0072B2,black
299,39.738700,0.450177,"[39.28852386860097, 40.18887705277313]","(MBL narZ-tlKO, PCAred, NO3)",#0072B2,black


In [82]:
all_strains_cds = bokeh.models.ColumnDataSource(all_strains_plot_df)

In [83]:
factors = bokeh.models.FactorRange(factors=all_strains_plot_df['cat'].unique())

circle_error_plot = bokeh.plotting.figure(
    x_range=factors,
#     y_axis_type='log',
    height=600,
    width=2000,
    y_axis_label = 'PCA oxidation rate (µM/hr)'
)

# box_plot.vbar(x='cat', top='max rate', fill_color='white', alpha=0.5, line_color='black', line_width=2, line_alpha=0.7, source=agg_cds)

circle = circle_error_plot.circle(y='max rate', 
#                 x=jitter('cat', range=box_plot.x_range, width=0.6),
                x='cat',
                source=all_strains_cds, 
                fill_color='fill color', 
                line_color='line color', 
                line_width=2, 
                size=10,
                alpha=0.7)

circle_error_plot.multi_line(xs=[[g, g] for g in all_strains_plot_df['cat'].values], 
                    ys=all_strains_plot_df['err_bars'].values, 
                    color='black', 
                    line_width=1.5, 
                    alpha=0.5)



circle_error_plot.xaxis.major_label_orientation = np.pi/4
# box_plot.xaxis.group_label_orientation = np.pi/2
circle_error_plot.xgrid.grid_line_color = None
circle_error_plot.output_backend = 'svg'

bokeh.io.show(circle_error_plot)

In [10]:
problem = data.loc[(data['date'] == '2022_01_07') & (data['Time [hr]'] == 0.154)]

In [11]:
problem

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type
0,0.154,A1,40677.0,calibration,basal medium,PCA,250,2022_01_07,biological
387,0.154,A2,37785.0,calibration,basal medium,PCA,200,2022_01_07,biological
774,0.154,A3,35490.0,calibration,basal medium,PCA,175,2022_01_07,biological
1161,0.154,A4,33343.0,calibration,basal medium,PCA,150,2022_01_07,biological
1548,0.154,A5,30618.0,calibration,basal medium,PCA,125,2022_01_07,biological
...,...,...,...,...,...,...,...,...,...
35217,0.154,H8,3534.0,Abiotic,PCA oxidizer basal medium,"PCAox, Fum2-","200, 10000",2022_01_07,biological
35604,0.154,H9,3655.0,Abiotic,PCA oxidizer basal medium,"PCAox, Fum2-","200, 10000",2022_01_07,biological
35991,0.154,H10,35653.0,Abiotic,PCA oxidizer basal medium,"PCAred, Fum2-","200, 10000",2022_01_07,biological
36378,0.154,H11,35989.0,Abiotic,PCA oxidizer basal medium,"PCAred, Fum2-","200, 10000",2022_01_07,biological


In [12]:
np.max(problem['PCAred fluorescence (AU)'].values)

40677.0

In [13]:
type(problem['PCAred fluorescence (AU)'].values[2])

numpy.float64

In [15]:
problem_exp = problem.loc[problem['Strain'] != 'calibration']

In [16]:
fig = bokeh.plotting.figure(width=600, height=400)
circle = fig.circle(x=problem_exp['Time [hr]'], y=problem_exp['PCAred fluorescence (AU)'])
bokeh.io.show(fig)

In [17]:
problem_exp.loc[problem_exp['PCAred fluorescence (AU)'] <= 10000]

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type
4644,0.154,B1,5291.0,MBL WT,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
5031,0.154,B2,5388.0,MBL WT,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
5418,0.154,B3,5262.0,MBL WT,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
5805,0.154,B4,4866.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
6192,0.154,B5,4686.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
6579,0.154,B6,4633.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
6966,0.154,B7,4496.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
7353,0.154,B8,4553.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
7740,0.154,B9,4745.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological
8127,0.154,B10,4899.0,MBL frdAB-tlKO,PCA oxidizer basal medium,PCA,0,2022_01_07,biological


In [36]:
problem2 = problem.loc[problem['Time [hr]'] == 0.321]

In [39]:
df = data.loc[data['date'] == '2022_07_13']

In [41]:
t0 = df.loc[df['Time [hr]'] == 0.068]

In [51]:
t0c = t0.loc[t0['Strain'] == 'calibration']

In [52]:
t0c

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type
0,0.068,A1,50628.0,calibration,basal medium,PCA,250,2022_07_13,biological
577,0.068,A2,45950.0,calibration,basal medium,PCA,200,2022_07_13,biological
1154,0.068,A3,42927.0,calibration,basal medium,PCA,175,2022_07_13,biological
1731,0.068,A4,39621.0,calibration,basal medium,PCA,150,2022_07_13,biological
2308,0.068,A5,35807.0,calibration,basal medium,PCA,125,2022_07_13,biological
2885,0.068,A6,31642.0,calibration,basal medium,PCA,100,2022_07_13,biological
3462,0.068,A7,26118.0,calibration,basal medium,PCA,75,2022_07_13,biological
4039,0.068,A8,20338.0,calibration,basal medium,PCA,50,2022_07_13,biological
4616,0.068,A9,13393.0,calibration,basal medium,PCA,25,2022_07_13,biological
5193,0.068,A10,9815.0,calibration,basal medium,PCA,10,2022_07_13,biological


In [27]:
fitting_pipeline(problem, show_plot=True)

RuntimeError: Optimal parameters not found: Number of calls to function has reached maxfev = 1200.

> [0;32m/Users/lmt/miniconda3/envs/cportucalensis/lib/python3.9/site-packages/scipy/optimize/minpack.py[0m(789)[0;36mcurve_fit[0;34m()[0m
[0;32m    787 [0;31m        [0mcost[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0msum[0m[0;34m([0m[0minfodict[0m[0;34m[[0m[0;34m'fvec'[0m[0;34m][0m [0;34m**[0m [0;36m2[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    788 [0;31m        [0;32mif[0m [0mier[0m [0;32mnot[0m [0;32min[0m [0;34m[[0m[0;36m1[0m[0;34m,[0m [0;36m2[0m[0;34m,[0m [0;36m3[0m[0;34m,[0m [0;36m4[0m[0;34m][0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m--> 789 [0;31m            [0;32mraise[0m [0mRuntimeError[0m[0;34m([0m[0;34m"Optimal parameters not found: "[0m [0;34m+[0m [0merrmsg[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    790 [0;31m    [0;32melse[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    791 [0;31m        [0;31m# Rename maxfev (leastsq) to max_nfev (least_squares), if specified.[0m

ipdb>  u


> [0;32m<ipython-input-2-14ddb1ccd15c>[0m(59)[0;36mfit_hill[0;34m()[0m
[0;32m     57 [0;31m    [0mydata[0m [0;34m=[0m [0mcalib_df[0m[0;34m[[0m[0;34m'PCAred fluorescence (AU)'[0m[0;34m][0m[0;34m.[0m[0mvalues[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     58 [0;31m[0;34m[0m[0m
[0m[0;32m---> 59 [0;31m    [0mpopt[0m[0;34m,[0m [0mpcov[0m [0;34m=[0m [0mscipy[0m[0;34m.[0m[0moptimize[0m[0;34m.[0m[0mcurve_fit[0m[0;34m([0m[0mgeneral_hill[0m[0;34m,[0m [0mxdata[0m[0;34m,[0m [0mydata[0m[0;34m,[0m [0mp0[0m[0;34m=[0m[0mp0[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     60 [0;31m[0;34m[0m[0m
[0m[0;32m     61 [0;31m    [0mplot[0m [0;34m=[0m [0mplot_calib_point[0m[0;34m([0m[0mcalib_df[0m[0;34m,[0m [0mtitle[0m[0;34m=[0m[0;34m'Fit of calibration model'[0m[0;34m,[0m [0malpha[0m[0;34m=[0m[0malpha[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m


ipdb>  p0


[150, 2, 9000, 1000, 1]


ipdb>  quit


In [18]:
problem_c = get_calib_data(problem)

In [20]:
popt, pcov = fit_hill(problem_c, alpha=1, show_plot=True, p0=[150, 2, 40000, 5000, 1])

In [21]:
problem_exp_calibrated = convert_fluor_to_conc(problem_exp, popt)

In [23]:
problem_exp_calibrated.dropna()

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type,measured PCAred (µM)
13932,0.154,D1,36329.0,MBL WT,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,182.364863
14319,0.154,D2,35988.0,MBL WT,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,178.104575
14706,0.154,D3,35582.0,MBL WT,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,173.198439
15093,0.154,D4,35050.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,167.026545
15480,0.154,D5,35569.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,173.044221
15867,0.154,D6,36000.0,MBL frdA-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,178.252288
16254,0.154,D7,36302.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,182.022721
16641,0.154,D8,35978.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,177.981601
17028,0.154,D9,35987.0,MBL frdB-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,178.092272
17415,0.154,D10,35939.0,MBL frdAB-tlKO,PCA oxidizer basal medium,PCAred,200,2022_01_07,biological,177.503053


In [48]:
c = calibrate_by_time_point(t0, show_plot=True)

In [44]:
calibrated_dfs

[       Unnamed: 0  Time [hr] Well  PCAred fluorescence (AU)  \
 3468         3468      0.072   B1                    6066.0   
 3757         3757      0.072   B2                   35741.0   
 4046         4046      0.072   B3                   34719.0   
 4335         4335      0.072   B4                   34697.0   
 4624         4624      0.072   B5                   35035.0   
 ...           ...        ...  ...                       ...   
 23119       23119     24.072   G8                    3912.0   
 23408       23408     24.072   G9                    3440.0   
 23697       23697     24.072  G10                    3726.0   
 23986       23986     24.072  G11                    5490.0   
 24275       24275     24.072  G12                    5981.0   
 
                                     Strain                     Medium  \
 3468                                   NaN                        NaN   
 3757                               Abiotic  PCA oxidizer basal medium   
 4046   

In [45]:
calibrated_dfs[-1]

Unnamed: 0.1,Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type,measured PCAred (µM)
88392,0,0.071,A1,8703.0,MBL WT,LB shaking pregrow,"PCAred, O2","200, trace",2021_12_28,technical,14338.024729
88681,289,0.071,A2,8094.0,MBL WT,LB shaking pregrow,"PCAred, O2","200, trace",2021_12_28,technical,14225.255599
88970,578,0.071,A3,8564.0,MBL WT,LB shaking pregrow,"PCAred, O2","200, trace",2021_12_28,technical,14310.945796
89259,867,0.071,A4,9070.0,MBL WT,LB standing pregrow,"PCAred, O2","200, trace",2021_12_28,technical,14413.789538
89548,1156,0.071,A5,9182.0,MBL WT,LB standing pregrow,"PCAred, O2","200, trace",2021_12_28,technical,14438.240340
...,...,...,...,...,...,...,...,...,...,...,...
114112,25720,24.071,H5,32132.0,PA14 ∆narG,LB standing pregrow,"PCAred, NO3-","200, 10000",2021_12_28,technical,
114401,26009,24.071,H6,32557.0,PA14 ∆narG,LB standing pregrow,"PCAred, NO3-","200, 10000",2021_12_28,technical,
114690,26298,24.071,H7,28622.0,PA14 ∆narG,LB + NO3- standing pregrow,"PCAred, NO3-","200, 10000",2021_12_28,technical,
114979,26587,24.071,H8,29686.0,PA14 ∆narG,LB + NO3- standing pregrow,"PCAred, NO3-","200, 10000",2021_12_28,technical,
