In [1]:
import pickle

import altair as alt

import pandas as pd

import polyclonal

import warnings
warnings.filterwarnings('ignore')

import altair_saver
import scipy as sp

import os

In [2]:
os.chdir('../../../')

In [20]:
ic50s = pd.read_csv('experiments/validations/230909_3944-2365_fold_changes.csv')
ic50s = ic50s.loc[(ic50s['serum'] == 2365) & (ic50s['variant'] != 'T160K-Y159N')]
ic50s

Unnamed: 0,serum,variant,ic50,log2_fold_change_ic50
7,2365,Y159N,0.000566,1.15878
8,2365,T160K,0.000269,0.08919
10,2365,S124R,0.00044,0.795738
11,2365,K189E,0.001163,2.199497
12,2365,S193D,0.000917,1.856389
13,2365,S145H,0.000206,-0.300784


In [21]:
model_prediction_df = pd.read_csv('results/antibody_escape/2365_avg.csv')
# pickle_file=f'results/polyclonal_fits/libA_230620_1_3944_1.pickle'
# pickle_file=f'results/polyclonal_fits/libB_230407_1_3944_1.pickle'
# pickle_file=f'results/polyclonal_fits/libA_230419_1_215C_1.pickle'
# pickle_file=f'results/polyclonal_fits/libB_230502_1_215C_1.pickle'
# with open(pickle_file, "rb") as f:
#     model = pickle.load(f)
# f.close()

# model_prediction_df = model[0].mut_escape_df

model_prediction_df['variant'] = (
    model_prediction_df["wildtype"] + 
    model_prediction_df["site"].astype(str) + 
    model_prediction_df["mutant"]
)

model_prediction_df = (
    model_prediction_df.drop(columns=[
        "wildtype",
        "site",
        "mutant",
    ])
)

# Merge model predictions with measured ICs
validation_vs_prediction = (
    ic50s.merge(
        model_prediction_df,
        how="left",
        on=["variant"],
        validate="one_to_one",
    )
    # .rename(columns={"varia" : "Amino acid substitutions", "ic50_bound" : "Lower bound"})
    .fillna(0)
)

validation_vs_prediction

Unnamed: 0,serum,variant,ic50,log2_fold_change_ic50,epitope,mutation,escape_mean,escape_median,escape_min_magnitude,escape_std,n_models,times_seen,frac_models
0,2365,Y159N,0.000566,1.15878,1,Y159N,0.3106,0.3106,0.2507,0.0846,2,122.5,1.0
1,2365,T160K,0.000269,0.08919,1,T160K,0.0047,0.0047,-0.0099,0.0206,2,186.5,1.0
2,2365,S124R,0.00044,0.795738,1,S124R,0.4245,0.4245,0.3866,0.0535,2,55.0,1.0
3,2365,K189E,0.001163,2.199497,1,K189E,0.781,0.781,0.6504,0.1848,2,47.5,1.0
4,2365,S193D,0.000917,1.856389,1,S193D,0.5509,0.5509,0.4033,0.2087,2,64.5,1.0
5,2365,S145H,0.000206,-0.300784,1,S145H,-0.0419,-0.0419,0.0147,0.08,2,10.0,1.0


In [22]:
# Calculate correlation between predicted and measured
r, p = sp.stats.pearsonr(
    x=validation_vs_prediction["log2_fold_change_ic50"], 
    y=validation_vs_prediction["escape_median"]
)
print(f"R={r}")
print(f"R^2={r**2}")

R=0.9610940772433635
R^2=0.9237018253122723


In [23]:
r = round(r, 3)

corrs = (
    alt.Chart(validation_vs_prediction)
    .mark_circle(size=400)
    .encode(
        x=alt.X('log2_fold_change_ic50',
                title='log2 fold change IC50',
                scale=alt.Scale(type="symlog"),
               ),
        y=alt.Y('escape_median',
                title='predicted escape score'
               ),
        tooltip=['variant']
    )
    .properties(
        title='Teenager 2365',
    )

)

text = corrs.mark_text(
    align='center',
    baseline='top',
    fontSize=12,
    dx=7,
    dy=12
).encode(
    text='variant'
)

text_2 = alt.Chart().mark_text(
    align='left', 
    baseline='bottom',
    fontSize=14,
    fontWeight=300
).encode(
    x=alt.value(10),
    y=alt.value(20),
    text=alt.value(["R-value: ", f'{r}'])
)

x_axis = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(
    size=1, 
    opacity=0.5, color='gray').encode(y='y')

y_axis = alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(
    size=1, 
    opacity=0.5, color='gray').encode(x='x')
    

chart = (
    (corrs + text + text_2 + x_axis + y_axis)
    .configure_axis(
        grid=False,
        labelFontSize=14,
        titleFontSize=15,  
    )
    .configure_title(
        fontSize=21,
        fontWeight='normal',
        # padding=5,
    )
)

chart.save(
    'scratch_notebooks/figure_drafts/validation_correlations/230927_2365_scatter.png',
    scale_factor=2.0
)

chart