In [13]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from pint import UnitRegistry
from scipy.stats.mstats import hmean

from analysis.util import dedup_trace_legends

In [14]:
units = UnitRegistry()

# noinspection PyTypeChecker
df_od = (
    pd.read_excel(
        '/Users/pasha/onedrive/nu/Katsev Pavel/04 Raw Data/EXP_0010_PK_20241018_NC_confirm_with_wash.xlsx',
        sheet_name='Day 3 - 5 OD tracking',
        usecols='A:J',
        skiprows=3,
    )
    .assign(arg_conc=lambda df: df.condition.str.split('•').str.get(0).apply(units.parse_expression))
    .assign(arg_conc_uM=lambda df: df.arg_conc.apply(lambda qty: int(qty.to('micromolar').magnitude)))
    # .assign(lim=lambda df: np.abs((df.od_sample - df.od_sample.mean()) / df.od_sample.std(ddof=0)) < 3)
)
df_od

Unnamed: 0,timestamp,elapsed_time_hr,sample_id,strain_id,condition,blank_src,conc_factor,od_blank_diluted,od_sample_diluted,od_sample,arg_conc,arg_conc_uM
0,2024-10-22 18:00:00,0.0,1.1,KSF001 wt,0µM•Arg,0µM•Arg SubMix,4,0.039,0.114,0.3,0 micromolar,0
1,2024-10-22 18:00:00,0.0,2.1,KSF230 ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.117,0.312,0 micromolar,0
2,2024-10-22 18:00:00,0.0,3.1,KSF107 + ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.11,0.284,0 micromolar,0
3,2024-10-22 18:00:00,0.0,4.1,KSF111 + ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.108,0.276,0 micromolar,0
4,2024-10-22 18:00:00,0.0,1.2,KSF001 wt,0µM•Arg,0µM•Arg SubMix,4,0.039,0.112,0.292,0 micromolar,0
5,2024-10-22 18:00:00,0.0,2.2,KSF230 ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.114,0.3,0 micromolar,0
6,2024-10-22 18:00:00,0.0,3.2,KSF107 + ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.107,0.272,0 micromolar,0
7,2024-10-22 18:00:00,0.0,4.2,KSF111 + ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.107,0.272,0 micromolar,0
8,2024-10-22 18:00:00,0.0,1.3,KSF001 wt,0µM•Arg,0µM•Arg SubMix,4,0.039,0.113,0.296,0 micromolar,0
9,2024-10-22 18:00:00,0.0,2.3,KSF230 ∆argB,0µM•Arg,0µM•Arg SubMix,4,0.039,0.114,0.3,0 micromolar,0


In [15]:
df_od_mean_sem = df_od.groupby(['elapsed_time_hr', 'strain_id', 'arg_conc_uM'], sort=False).agg(
    od_sample_mean=pd.NamedAgg('od_sample', 'mean'),
    od_sample_sem=pd.NamedAgg('od_sample', 'sem'),
).round(3).reset_index()
df_od_mean_sem

Unnamed: 0,elapsed_time_hr,strain_id,arg_conc_uM,od_sample_mean,od_sample_sem
0,0.0,KSF001 wt,0,0.296,0.002
1,0.0,KSF230 ∆argB,0,0.304,0.004
2,0.0,KSF107 + ∆argB,0,0.279,0.004
3,0.0,KSF111 + ∆argB,0,0.272,0.002
4,17.0,KSF001 wt,0,1.083,0.021
5,17.0,KSF230 ∆argB,0,0.26,0.002
6,17.0,KSF107 + ∆argB,0,0.359,0.005
7,17.0,KSF111 + ∆argB,0,0.407,0.006
8,24.5,KSF001 wt,0,1.075,0.021
9,24.5,KSF230 ∆argB,0,0.26,0.004


In [25]:
fig_arg_response = (
    px
    .bar(
        df_od_mean_sem,
        x='elapsed_time_hr',
        y='od_sample_mean',
        error_y='od_sample_sem',
        color='strain_id',
        facet_col='arg_conc_uM',
        barmode='group',
        labels={
            'arg_conc_uM': '[Arg] µM',
            'od_sample_mean': 'OD600 (mean)',
            'strain_id': 'ADP1 strain',
            'elapsed_time_hr': '',
        },
    )
    .add_traces(
        px.line(
            df_od_mean_sem,
            x='elapsed_time_hr',
            y='od_sample_mean',
            color='strain_id',
            facet_col='arg_conc_uM',
            markers=False,
        )['data']
    )
    .update_layout(
        title='OD ADP1 [KSF001 (wt), KSF230(∆argB), KSF107 (∆cphaAI ∆astA ∆argB), KSF111 (∆cphAI ∆astA ∆argBR)] | growth in 0µm•Arg after LB preculture and 0µm•Arg wash',
        title_x=0.5,
        showlegend=True,
        boxmode='group',
        boxgap=.8,  # Gap between boxes in the same group (0.5 makes them narrower)
        boxgroupgap=0,  # Gap between different groups of boxes,
        # height=500,
        # width=2000,
        xaxis_title='Elapsed Time [hr]',
        # xaxis_title_x=.5,
    )
    .for_each_trace(dedup_trace_legends())
)

# fig_arg_response.write_image('../../figures/pk_exp0010_arg_response_raw.pdf')
fig_arg_response

In [17]:
df_od_mean_max = df_od_mean_sem.sort_values(
    by=['arg_conc_uM', 'strain_id', 'od_sample_mean'],
).drop_duplicates(
    subset=['arg_conc_uM', 'strain_id'],
    keep='last',
)

_bg_107, _bg_111 = df_od_mean_max.loc[lambda df: df.arg_conc_uM == 0].od_sample_mean.iloc[:2]
df_od_mean_max = df_od_mean_max.assign(
    od_sample_mean_sans_bg=lambda df: df.apply(
        lambda r: r.od_sample_mean - (_bg_107 if '107' in r.strain_id else _bg_111),
        axis='columns'
    ),
)

df_od_mean_max

Unnamed: 0,elapsed_time_hr,strain_id,arg_conc_uM,od_sample_mean,od_sample_sem,od_sample_mean_sans_bg
12,40.0,KSF001 wt,0,1.153,0.007,0.757
14,40.0,KSF107 + ∆argB,0,0.396,0.0,-0.757
15,40.0,KSF111 + ∆argB,0,0.46,0.004,0.064
1,0.0,KSF230 ∆argB,0,0.304,0.004,-0.092


In [18]:
def od_by_arg_conc(
        arg_conc,
        od600_to_cdw=.33 * units.g / units.L,
        perc_cgp_of_cdw=.44,
        perc_arg_of_cgp=.3,
        mw_arg=174.2 * units.g / units.mol
) -> float:
    od = arg_conc / (od600_to_cdw * perc_cgp_of_cdw * perc_arg_of_cgp / mw_arg)
    return od.to_base_units()


df_cgp_arg = pd.DataFrame(data=dict(arg_conc_µM=[0, 5000])).assign(
    od=lambda df: df.arg_conc_µM.apply(lambda c: od_by_arg_conc(c * units.uM).magnitude)
)
# df_cgp_arg

In [19]:
def max_od_by_arg_conc(
        arg_conc,
        od600_to_cdw=.33 * units.g / units.L,
        perc_arg_of_cdw=.025,  # (1prot/2cdw)(1arg/20prot) = 1/40
        mw_arg=174.2 * units.g / units.mol
) -> float:
    od = arg_conc / (od600_to_cdw * perc_arg_of_cdw / mw_arg)
    return od.to_base_units()


df_arg_cdw = pd.DataFrame(data=dict(arg_conc_µM=[0, 250, 1000])).assign(
    od=lambda df: df.arg_conc_µM.apply(lambda c: max_od_by_arg_conc(c * units.uM).magnitude)
)
# df_arg_cdw

In [20]:
df_max_ods_sans_0uM = (
    df_od_mean_max
    # .replace({'arg_conc_uM'})
    # .loc[lambda df: (0 < df.arg_conc_uM)]
    # .loc[lambda df: (0 <= df.arg_conc_µM) & (df.arg_conc_µM < 500)]
    # .loc[lambda df: (0 <= df.arg_conc_µM) & (df.arg_conc_µM < 4000)]
    # .loc[lambda df: (0 <= df.arg_conc_µM) & (df.arg_conc_µM < 4000)]
)
df_max_ods_sans_0uM

Unnamed: 0,elapsed_time_hr,strain_id,arg_conc_uM,od_sample_mean,od_sample_sem,od_sample_mean_sans_bg
12,40.0,KSF001 wt,0,1.153,0.007,0.757
14,40.0,KSF107 + ∆argB,0,0.396,0.0,-0.757
15,40.0,KSF111 + ∆argB,0,0.46,0.004,0.064
1,0.0,KSF230 ∆argB,0,0.304,0.004,-0.092


In [23]:
px.bar(
    df_max_ods_sans_0uM,
    x='arg_conc_uM',
    y='od_sample_mean',
    # y='od_sample_mean_sans_bg',
    error_y='od_sample_sem',
    color='strain_id',
    barmode='group',
    labels={
        'arg_conc_uM': '[Arg] µM',
        'od_sample_mean': 'OD600_mean',
        'od_sample_mean_sans_bg': 'OD600_mean (background removed)',
        'strain_id': 'ADP1 strain',
        'elapsed_time_hr': 'Elapsed Time (hr)',
    },
).update_layout(
    # title='OD ADP1 [KSF107 (∆astA ∆argB), KSF111 (∆astA ∆argBR)] | growth in dosed Arg after preculture and wash',
    # title_x=0.5,
    showlegend=True,
    height=500,
)

In [11]:
# df_max_ods_sans_0uM_means.assign(
#     arg_g=lambda df: df.index.get_level_values('arg_conc_μM') / 1E6 * 174.2,
#     bm_g=lambda df: df.od_sample_mean * .33,
# )

In [12]:
fig_yield = px.scatter(
    df_od_mean_max,
    x='arg_conc_uM',
    y='od_sample_mean_sans_bg',
    color='strain_id',
    labels={
        'strain_id': 'ADP1 Strain',
        'od_sample_x': 'OD',
        'arg_conc_μM': '[Arg] (µM)'
    },
    trendline='ols',
).update_layout(
    # title='ADP1 {KSF230 ∆argB, KSF? ∆argB ∆argR} Biomass Yield Curves',
    title_x=0.5,
)
fig_yield.add_traces([
    # constraint: 
    go.Scatter(
        x=df_cgp_arg.arg_conc_μM, y=df_cgp_arg.od,
        name='[Arg] from BM w/CGP',
        mode='lines', line=dict(dash='dot', color='darkgray'),
        # fill='tonexty',
    ),
    # constraint: 
    go.Scatter(
        x=df_arg_cdw.arg_conc_μM, y=df_arg_cdw.od,
        name='ODmax by 2.5% Arg',
        mode='lines', line=dict(dash='dash'),
        # fill='tonexty',
    ),
])
yield_fits = px.get_trendline_results(fig_yield).set_index('ADP1 Strain').px_fit_results

fig_yield.update_layout(template='simple_white')

fig_yield.write_image('../../figures/pk_exp007_AginineTitrationResponse.pdf')
fig_yield

KeyError: "None of ['ADP1 Strain'] are in the columns"

In [12]:
pio.templates

Templates configuration
-----------------------
    Default template: 'plotly'
    Available templates:
        ['ggplot2', 'seaborn', 'simple_white', 'plotly',
         'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
         'ygridoff', 'gridon', 'none']

In [13]:
yield_fits['KSF230'].summary()
yield_fits['∆argR'].summary()



`kurtosistest` p-value may be inaccurate with fewer than 20 observations; only n=10 observations were given.


`kurtosistest` p-value may be inaccurate with fewer than 20 observations; only n=10 observations were given.



0,1,2,3
Dep. Variable:,y,R-squared:,0.769
Model:,OLS,Adj. R-squared:,0.74
Method:,Least Squares,F-statistic:,26.57
Date:,"Sun, 13 Oct 2024",Prob (F-statistic):,0.000869
Time:,09:45:42,Log-Likelihood:,-0.94321
No. Observations:,10,AIC:,5.886
Df Residuals:,8,BIC:,6.492
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,0.8103,0.130,6.220,0.000,0.510,1.111
x1,0.0001,2.56e-05,5.155,0.001,7.3e-05,0.000

0,1,2,3
Omnibus:,4.766,Durbin-Watson:,0.642
Prob(Omnibus):,0.092,Jarque-Bera (JB):,1.194
Skew:,0.15,Prob(JB):,0.55
Kurtosis:,1.334,Cond. No.,7050.0


In [14]:
def theor_arg_from_OD600_with_CGP(od600: float, frac_cgp_of_cdw: float = 0.25, frac_arg_of_cgp: float = .6,
                                  mw_arg: float = 174.2) -> float:
    return od600 * .33 * frac_cgp_of_cdw * frac_arg_of_cgp / mw_arg * 1E6


def theor_OD600_with_CGP_for_x_arg(x_arg: float, frac_cgp_of_cdw: float = 0.25, frac_arg_of_cgp: float = .6,
                                   mw_arg: float = 174.2) -> float:
    return x_arg / (.33 * frac_cgp_of_cdw * frac_arg_of_cgp / mw_arg * 1E6)


def yield_in_g_g(x):
    return x * .33 / (1.742 * 10 ** -4)


yield_in_g_g(0.0004)

theor_arg_from_OD600_with_CGP(1)
theor_OD600_with_CGP_for_x_arg(100)
theor_OD600_with_CGP_for_x_arg(250)
# t = yield_fits['KSF230'].summary().tables[1]
# t[('x1')]

0.8797979797979798

In [15]:
.506 / theor_OD600_with_CGP_for_x_arg(100, frac_cgp_of_cdw=.5)

2.8756601607347876

In [16]:
import pandera as pa
import pandera.typing as pat
from analysis.schemas import SakaguchiODSchema


@pa.check_types()
def sakaguchi_analysis(df: pat.DataFrame[SakaguchiODSchema]):
    return df

In [17]:
# noinspection PyTypeChecker
df_sakaguchi = pd.read_excel(
    '~/tyo_lab_pk/04 Raw Data/EXP_0007_PK_20240909_ArgConcentrationResponse.xlsx',
    sheet_name='Sakaguchi for residual arg',
    usecols='B:E',
    skiprows=45,
)

sakaguchi_analysis(df_sakaguchi)

SchemaError: error in check_types decorator of function 'sakaguchi_analysis': column 'conc_factor' not in dataframe. Columns in dataframe: ['sample', 'vol_sample_ul', 'vol_reagentA_ul', 'vol_reagentB_ul']

In [13]:
plotly_colors = {
    "SP_Brights_Green": "rgb(81, 255, 0)",
    "SP_Brights_Light_Blue": "rgb(132, 255, 199)",
    "SP_Brights_Blue": "rgb(81, 168, 255)",
    "SP_Brights_Yellow": "rgb(229, 255, 28)",
    "SP_Brights_Orange": "rgb(255, 196, 13)",
    "SP_Brights_Orange_Red": "rgb(252, 45, 51)",
    "SP_Dark_Green": "rgb(27, 168, 36)",
    "SP_Dark_Light_Blue": "rgb(35, 150, 197)",
    "SP_Dark_Blue": "rgb(58, 38, 147)",
    "SP_Dark_Yellow": "rgb(209, 219, 0)",
    "SP_Dark_Orange": "rgb(201, 126, 19)",
    "SP_Dark_Orange_Red": "rgb(229, 57, 0)"
}

In [28]:
import plotly.graph_objects as go
import numpy as np

# Create figure
fig = go.Figure()

black = 'rgba(255,255,255,0)'

# create linear space
x_vals = np.linspace(0, 10, 100)

# doubling references
y_vals_1_1 = x_vals
y_vals_1_2 = 2 * x_vals
y_vals_1_4 = 4 * x_vals

fig.add_trace(
    go.Scatter(x=x_vals, y=y_vals_1_1, mode='lines', line=dict(dash='dash', color='black', width=1), name='1:1'))
fig.add_trace(
    go.Scatter(x=x_vals, y=y_vals_1_2, mode='lines', line=dict(dash='dash', color='black', width=1), name='1:2'))
fig.add_trace(
    go.Scatter(x=x_vals, y=y_vals_1_4, mode='lines', line=dict(dash='dash', color='black', width=1), name='1:4'))

# x -> 1:1   real bad like
fig.add_trace(go.Scatter(x=np.concatenate([x_vals, x_vals[::-1]]),
                         y=np.concatenate([y_vals_1_1, np.zeros_like(x_vals)]),
                         fill='toself',
                         fillcolor=plotly_colors['SP_Brights_Orange_Red'],
                         opacity=0.2,
                         line=dict(color=black),
                         showlegend=True))
# 1:1 -> 1:2  meh
fig.add_trace(go.Scatter(x=np.concatenate([x_vals, x_vals[::-1]]),
                         y=np.concatenate([y_vals_1_1, y_vals_1_2[::-1]]),
                         fill='toself',
                         fillcolor=plotly_colors['SP_Dark_Orange_Red'],
                         opacity=0.2,
                         line=dict(color=black),
                         showlegend=False))
# 1:2 -> 1:4  ok
fig.add_trace(go.Scatter(x=np.concatenate([x_vals, x_vals[::-1]]),
                         y=np.concatenate([y_vals_1_2, y_vals_1_4[::-1]]),
                         fill='toself',
                         fillcolor=plotly_colors['SP_Dark_Green'],
                         opacity=0.2,
                         line=dict(color=black),
                         showlegend=False))
# y -> 1:4  cookin
fig.add_trace(go.Scatter(x=np.concatenate([x_vals, np.zeros_like(x_vals)]),
                         y=np.concatenate([y_vals_1_4, y_vals_1_4[::-1]]),
                         fill='toself',
                         fillcolor=plotly_colors['SP_Brights_Green'],
                         line=dict(color=black),
                         opacity=0.2,
                         showlegend=False))

fig.add_annotation(x=.9, y=.8, text="death", showarrow=False, font=dict(size=20))
fig.add_annotation(x=.8, y=.95, text="no doubling", showarrow=False, font=dict(size=20))
fig.add_annotation(x=.35, y=.95, text="1-2 doublings", showarrow=False, font=dict(size=20))
fig.add_annotation(x=.11, y=.95, text="≥2 doublings", showarrow=False, font=dict(size=20))

# theoretical and experimental curves

fig.add_trace(go.Scatter(x=x_vals, y=1.43 * x_vals, mode='lines', line=dict(color='black'), name='∆argBR Best'))
fig.add_trace(go.Scatter(x=x_vals, y=4.6 * x_vals, mode='lines', line=dict(color='purple'), name='theoretical'))

fig.add_annotation(x=.2, y=.51, text="Y theoretical = 4.5", showarrow=False, font=dict(size=16))
fig.add_annotation(x=.42, y=.4, text="Y ∆argBR (best of) = 1.5", showarrow=False, font=dict(size=16))

# Update axes and layout for the plot
fig.update_layout(
    xaxis_title='ADP1 with CGP from Phase 1 (OD600)',
    yaxis_title='ADP1 in Phase 2 (OD600)',
    xaxis=dict(range=[0, 1]),
    yaxis=dict(range=[0, 1]),
    showlegend=False,
    width=700,
    height=700,
    plot_bgcolor='rgba(0,0,0,0)'
)

fig

NameError: name 'plotly_colors' is not defined

In [29]:
fig.write_image('/Users/pasha/src/nu/posters/artifacts/yield_cgp_for_arg.pdf')
