In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pint import UnitRegistry
from scipy.stats.mstats import hmean

from analysis.util import dedup_trace_legends

In [None]:
units = UnitRegistry()

# noinspection PyTypeChecker
df_od = (
    pd.read_excel(
        '~/tyo_lab_pk/04 Raw Data//EXP_0007_PK_20240909_ArgConcentrationResponse.xlsx',
        sheet_name='Day 3 - 5 OD tracking',
        usecols='A:L',
        skiprows=3,
    )
    .assign(arg_conc=lambda df: df.condition.str.split('•').str.get(0).apply(units.parse_expression))
    .assign(arg_conc_µM=lambda df: df.arg_conc.apply(lambda qty: qty.to('micromolar').magnitude))
)
df_od

In [None]:
px.box(
    df_od,
    x='elapsed_time_hr',
    y='od_sample',
    color='strain_id',
    facet_col='condition',
    points='all',  # Show all points
    title='OD ADP1 Strains and Conditions',
    labels={
        'od_sample': 'OD',
        'strain_id': 'ADP1 Strain',
        'elapsed_time_hr': 'Elapsed Time (hr)',
    },
).add_traces(
    px.line(
        # od means
        df_od.groupby(['elapsed_time_hr', 'strain_id', 'condition'], sort=False).od_sample.mean().reset_index(),
        x='elapsed_time_hr',
        y='od_sample',
        color='strain_id',
        facet_col='condition',
        markers=False,
    )['data']
).update_layout(
    title='OD ADP1 {KSF230 ∆argB, KSF? ∆argB ∆argR, KSF001 wt} | growth in dosed Arg after preculture and wash (harmonic means)',
    title_x=0.5,
    showlegend=True,
    boxmode='group',
    boxgap=.8,  # Gap between boxes in the same group (0.5 makes them narrower)
    boxgroupgap=0,  # Gap between different groups of boxes,
    height=600,
).for_each_trace(dedup_trace_legends())

In [None]:
def max_ods(df_od: pd.DataFrame, strains: list[str]) -> pd.DataFrame:
    return (
        df_od
        .loc[lambda df: df.strain_id.isin(strains)]
        # filter down to max OD points (based on hmeans)
        .pipe(lambda df: df.merge(
            df.groupby(['strain_id', 'condition', 'elapsed_time_hr']).od_sample.apply(hmean).reset_index()
            .sort_values(by=['strain_id', 'condition', 'od_sample'], ascending=False)
            .drop_duplicates(subset=['strain_id', 'condition']),
            on=['strain_id', 'condition', 'elapsed_time_hr'],
            sort=False,
        ))
        .sort_values(by='arg_conc')
    )

In [None]:
def od_by_arg_conc(
        arg_conc,
        od600_to_cdw=.33 * units.g / units.L,
        perc_cgp_of_cdw=.44,
        perc_arg_of_cgp=.3,
        mw_arg=174.2 * units.g / units.mol
) -> float:
    od = arg_conc / (od600_to_cdw * perc_cgp_of_cdw * perc_arg_of_cgp / mw_arg)
    return od.to_base_units()


df_cgp_arg = pd.DataFrame(data=dict(arg_conc_µM=[0, 5000])).assign(
    od=lambda df: df.arg_conc_µM.apply(lambda c: od_by_arg_conc(c * units.uM).magnitude)
)
# df_cgp_arg

In [None]:
def max_od_by_arg_conc(
        arg_conc,
        od600_to_cdw=.33 * units.g / units.L,
        perc_arg_of_cdw=.025, # (1prot/2cdw)(1arg/20prot) = 1/40
        mw_arg=174.2 * units.g / units.mol
) -> float:
    od = arg_conc / (od600_to_cdw * perc_arg_of_cdw / mw_arg)
    return od.to_base_units()


df_arg_cdw = pd.DataFrame(data=dict(arg_conc_µM=[0, 250, 1000])).assign(
    od=lambda df: df.arg_conc_µM.apply(lambda c: max_od_by_arg_conc(c * units.uM).magnitude)
)
df_arg_cdw

In [None]:
df_max_ods_sans_0uM = (
    max_ods(df_od, [
        'KSF230',
        '∆argR',
    ])
    # .loc[lambda df: (0 < df.arg_conc_µM) & (df.arg_conc_µM <= 500)]
    # .loc[lambda df: (0 < df.arg_conc_µM) & (df.arg_conc_µM <= 500)]
)
fig_yield = px.scatter(
    df_max_ods_sans_0uM,
    x='arg_conc_μM',
    y='od_sample_x',
    color='strain_id',
    labels={
        'strain_id': 'ADP1 Strain',
        'od_sample_x': 'OD',
        'arg_conc_μM': '[Arginine] (µM)'
    },
    trendline='ols',
).update_layout(
    title='ADP1 {KSF230 ∆argB, KSF? ∆argB ∆argR} Biomass Yield Curves',
    title_x=0.5,
)
fig_yield.add_traces([
    # constraint: 
    go.Scatter(
        x=df_cgp_arg.arg_conc_μM, y=df_cgp_arg.od,
        name='[Arg] from BM w/CGP',
        mode='lines', line=dict(dash='dot', color='darkgray'),
        # fill='tonexty',
    ),
    # constraint: 
    go.Scatter(
        x=df_arg_cdw.arg_conc_μM, y=df_arg_cdw.od,
        name='ODmax by 2.5% Arg',
        mode='lines', line=dict(dash='dash'),
        # fill='tonexty',
    ),
])
yield_fits = px.get_trendline_results(fig_yield).set_index('ADP1 Strain').px_fit_results

fig_yield.write_image('../../figures/pk_exp007_AginineTitrationResponse.eps')
fig_yield

In [None]:
yield_fits['KSF230'].summary()

In [None]:
def yield_in_g_g(x):
    return x * .33 / (1.742 * 10 ** -4)

yield_in_g_g(0.0004)
dir(yield_fits['KSF230'].summary())

In [None]:
import pandera as pa
import pandera.typing as pat
from analysis.schemas import SakaguchiODSchema

@pa.check_types()
def sakaguchi_analysis(df: pat.DataFrame[SakaguchiODSchema]):
    return df

In [None]:
# noinspection PyTypeChecker
df_sakaguchi = pd.read_excel(
    '~/tyo_lab_pk/04 Raw Data/EXP_0007_PK_20240909_ArgConcentrationResponse.xlsx',
    sheet_name='Sakaguchi for residual arg',
    usecols='B:E',
    skiprows=45,
)

sakaguchi_analysis(df_sakaguchi)