In [1]:
import pickle

import altair as alt

import pandas as pd

import polyclonal

import warnings
warnings.filterwarnings('ignore')

from IPython.utils import io

In [2]:
import os
os.chdir('../../')

In [3]:
# set up function for mean prob escape chart to avoid clutter from large block of code

def plot_avg_escape(prob_escape):
    max_aa_subs = 4  # group if >= this many substitutions
    
    mean_prob_escape = (
        prob_escape.assign(
            n_subs=lambda x: (
                x["aa_substitutions_reference"]
                .str.split()
                .map(len)
                .clip(upper=max_aa_subs)
                .map(lambda n: str(n) if n < max_aa_subs else f">{max_aa_subs - 1}")
            )
        )
        .groupby(["antibody_concentration", "n_subs"], as_index=False)
        .aggregate({"prob_escape": "mean", "prob_escape_uncensored": "mean"})
        .rename(
            columns={
                "prob_escape": "censored to [0, 1]",
                "prob_escape_uncensored": "not censored",
            }
        )
        .melt(
            id_vars=["antibody_concentration", "n_subs"],
            var_name="censored",
            value_name="probability escape",
        )
    )

    mean_prob_escape_chart = (
        alt.Chart(mean_prob_escape)
        .encode(
            x=alt.X("antibody_concentration"),
            y=alt.Y(
                "probability escape",
                scale=alt.Scale(type="symlog", constant=0.05),
            ),
            column=alt.Column("censored", title=None),
            color=alt.Color("n_subs", title="n substitutions"),
            tooltip=[
                alt.Tooltip(c, format=".3g") if mean_prob_escape[c].dtype == float else c
                for c in mean_prob_escape.columns
            ],
        )
        .mark_line(point=True, size=0.5)
        .properties(width=200, height=125)
        .configure_axis(grid=False)
    )

    return mean_prob_escape_chart

In [4]:
def generate_model(
    prob_escape_df,
    n_epitopes=1
):
    
    model = polyclonal.Polyclonal(
        n_epitopes=n_epitopes,
        data_to_fit=prob_escape_df.rename(
            columns={
                "antibody_concentration": "concentration",
                "aa_substitutions_reference": "aa_substitutions",
            }
        ),
        alphabet=polyclonal.AAS_WITHSTOP_WITHGAP,
    )

    # fit model, suppressing output text to avoid clutter in notebook
    with io.capture_output() as captured:
        opt_res = model.fit(
            logfreq=200,
            reg_escape_weight=0.1,
        )

    mut_escape_plot = model.mut_escape_plot(addtl_slider_stats={"times_seen": 3}, init_floor_at_zero=False)
    return mut_escape_plot

In [6]:
prob_escape = pd.read_csv(
    "results/prob_escape/libA_221027_1_3x-1C04_5G04_1_prob_escape.csv", keep_default_na=False, na_values="nan"
).query(
    "`no-antibody_count` >= no_antibody_count_threshold"
)  # filter for those with sufficient no-antibody counts
# assert prob_escape.notnull().all().all()
prob_escape.head()

Unnamed: 0,library,antibody_sample,no-antibody_sample,aa_substitutions_sequential,n_aa_substitutions,barcode,prob_escape,prob_escape_uncensored,antibody_count,no-antibody_count,antibody_neut_standard_count,no-antibody_neut_standard_count,no_antibody_count_threshold,antibody_count_threshold,aa_substitutions_reference,retain,antibody,antibody_concentration
0,libA,221027_1_antibody_3x-1C04_5G04_14.67_1,221027_1_no-antibody_control_1,K297I,1,ATAACACAAAAAAGTA,0.0003,0.0003,47517,408660,2289110,6257,33,,K278I,True,3x-1C04_5G04,14.67
1,libA,221027_1_antibody_3x-1C04_5G04_14.67_1,221027_1_no-antibody_control_1,R111S V366M R402S,3,TATCTACCTAACGAAA,0.0003,0.0003,15232,143540,2289110,6257,33,,R92S V347M R383S,True,3x-1C04_5G04,14.67
2,libA,221027_1_antibody_3x-1C04_5G04_14.67_1,221027_1_no-antibody_control_1,D209A,1,CCTTAGTGTAATAAAA,0.0005,0.0005,14524,85923,2289110,6257,33,,D190A,True,3x-1C04_5G04,14.67
3,libA,221027_1_antibody_3x-1C04_5G04_14.67_1,221027_1_no-antibody_control_1,K46A I301V E409M,3,AAAAATCTGAGACAAA,0.0003,0.0003,13150,113350,2289110,6257,33,,K27A I282V E390M,True,3x-1C04_5G04,14.67
4,libA,221027_1_antibody_3x-1C04_5G04_14.67_1,221027_1_no-antibody_control_1,L89I L263H Q520R,3,CTCTTTAAAATCCATT,0.0004,0.0004,12801,85009,2289110,6257,33,,L70I L244H Q501R,True,3x-1C04_5G04,14.67


In [8]:
plot_avg_escape(prob_escape)

In [9]:
filt = prob_escape.loc[prob_escape['antibody_concentration'] < 4]
plot_avg_escape(filt)

In [None]:
generate_m