# Generate a file with all age cohort escape data from A/Hong Kong/45/2019 H3 HA
This file is filtered using the following parameters, specified in `data/polyclonal_config.yaml`:
* Functional effect threshold (-1.38)
* Minimum times seen (n=3)
* Allowed amino acids (all except stop codons)

In [None]:
import altair as alt

import pandas as pd

import polyclonal

import yaml

Read the data and get config parameters

In [None]:
with open('data/polyclonal_config.yaml') as f:
    config = yaml.safe_load(f)['overall_default']['plot_kwargs']
    
func_effect = config['addtl_slider_stats']['functional effect']
times_seen = config['addtl_slider_stats']['times_seen']
aa_list = config['alphabet']

Get functional effects

In [None]:
muteffects_csv = "results/muteffects_functional/muteffects_observed.csv"

muteffects = pd.read_csv(muteffects_csv).rename(
    columns={"reference_site": "site", "effect": "functional effect"}
)[["site", "mutant", "functional effect"]]

Define samples in each age cohort

In [None]:
cohort_dict = {
    '2-5_years': [
        '3944',
        '2389',
        '2323',
        '2388',
        '3973',
        '4299',
        '4584',
        '2367',
    ],
    '15-20_years': [
        '2350',
        '2365',
        '2382',
        '3866',
        '2380',
        '3856',
        '3857',
        '3862'
    ],
    '40-45_years': [
        '33C',
        '34C',
        '197C',
        '199C', 
        '215C',
        '210C', 
        '74C', 
        '68C', 
        '150C', 
        '18C',
    ],    
}

Read the library-averaged escape dfs for each serum, filter by defined parameters, and combine to one summary escape file.

In [None]:
escape_df_list = []

for cohort, serum_list in cohort_dict.items():
    for serum in serum_list:
        df = (pd.read_csv(f'results/antibody_escape/{serum}_avg.csv')
              .query(f"`times_seen` >= @times_seen")
              .query("`mutant` in @aa_list")
              .merge(muteffects,
                      how='left',
                      on=['site', 'mutant']
                     )
              .query("`functional effect` >= @func_effect")
             )
        
        df['serum'] = serum
        df['cohort'] = cohort
        
        # drop extraneous columns
        df = df.drop(['epitope', 'escape_median', 'escape_min_magnitude'], axis=1)
        
        escape_df_list.append(df)
        
escape_df = pd.concat(escape_df_list)

In [None]:
output_csv = 'results/full_hk19_escape_scores.csv'
print(f'Writing to {output_csv}')
escape_df.to_csv(output_csv, index=False)

escape_df