In [1]:
import altair as alt

import pandas as pd

import polyclonal
import yaml

Read the data:

In [2]:
import os
os.chdir('../')

Get config parameters

In [3]:
with open('data/polyclonal_config.yaml') as f:
    config = yaml.safe_load(f)['overall_default']['plot_kwargs']
    
func_effect = config['addtl_slider_stats']['functional effect']
times_seen = config['addtl_slider_stats']['times_seen']
aa_list = config['alphabet']

Get functional effects

In [4]:
muteffects_csv = "results/muteffects_functional/muteffects_observed.csv"

muteffects = pd.read_csv(muteffects_csv).rename(
    columns={"reference_site": "site", "effect": "functional effect"}
)[["site", "mutant", "functional effect"]]

In [5]:
cohort_dict = {
    '2-5_years': [
        '3944',
        '2389',
        '2323',
        '2388',
        '3973',
        '4299',
        '4584',
        '2367',
    ],
    '15-20_years': [
        '2350',
        '2365',
        '2382',
        '3866',
        '2380',
        '3856',
        '3857',
        '3862'
    ],
    '40-45_years': [
        '33C',
        '34C',
        '197C',
        '199C', 
        '215C',
        '210C', 
        '74C', 
        '68C', 
        '150C', 
        '18C',
    ],    
}

In [6]:
escape_df_list = []

for cohort, serum_list in cohort_dict.items():
    for serum in serum_list:
        df = (pd.read_csv(f'results/antibody_escape/{serum}_avg.csv')
              .query(f"`times_seen` >= @times_seen")
              .query("`mutant` in @aa_list")
              .merge(muteffects,
                      how='left',
                      on=['site', 'mutant']
                     )
              .query("`functional effect` >= @func_effect")
             )
        
        df['serum'] = serum
        df['cohort'] = cohort
        
        escape_df_list.append(df)
        
escape_df = pd.concat(escape_df_list)
escape_df.head()

Unnamed: 0,epitope,site,wildtype,mutant,mutation,escape_mean,escape_median,escape_min_magnitude,escape_std,n_models,times_seen,frac_models,functional effect,serum,cohort
0,1,-2,D,G,D-2G,0.1278,0.1278,0.0944,0.0473,2,3.0,1.0,-0.6583,3944,2-5_years
1,1,-2,D,Y,D-2Y,0.0338,0.0338,-0.0017,0.0501,2,7.0,1.0,-0.644,3944,2-5_years
2,1,1,Q,H,Q1H,0.0069,0.0069,-0.1341,0.1994,2,3.0,1.0,-0.1601,3944,2-5_years
3,1,1,Q,R,Q1R,-0.0235,-0.0235,0.0545,0.1103,2,5.0,1.0,-0.6362,3944,2-5_years
4,1,2,K,N,K2N,-0.0178,-0.0178,0.0521,0.099,2,5.0,1.0,-0.1545,3944,2-5_years


In [9]:
escape_df.to_csv('results/full_hk19_escape_scores.csv', index=False)