In [8]:
import pandas as pd

import dms_variants
import dms_variants.codonvarianttable
from dms_variants.constants import CBPALETTE
import dms_variants.plotnine_themes
import neutcurve
from neutcurve.colorschemes import CBMARKERS, CBPALETTE

import matplotlib
from matplotlib import pyplot as plt

import warnings
warnings.filterwarnings("ignore")

from plotnine import *

# _ = theme_set(dms_variants.plotnine_themes.theme_graygrid())

### Get single tidy df for neut runs for <5yo sera

In [3]:
# start with finalized set of neuts, so we can then remove these samples from rough run data
final_1 = pd.read_csv('neut_data/230212_neuts_tidy.csv')
final_2 = pd.read_csv('neut_data/230214_neuts_tidy.csv')

finalized_ped_neuts = pd.concat([final_1, final_2])

In [4]:
rough_run1 = pd.read_csv('neut_data/230208_neuts_tidy_PBS.csv')

# clean up serum column for run that had 1C04 included
rough_run2 = pd.read_csv('neut_data/230210_neuts_tidy.csv')
rough_run2 = rough_run2.loc[rough_run2['serum'] != '1C04']
rough_run2['serum'] = rough_run2['serum'].astype(int)

rough_ped_neuts = pd.concat([rough_run1, rough_run2])

In [5]:
full_neuts = finalized_ped_neuts.merge(rough_ped_neuts,
                                       on=['serum', 'virus', 'replicate', 'concentration'],
                                       how='outer',
                                       indicator=True
                                      )

# get rid of duplicates from rough runs (the right df in this merge)
full_neuts['fraction infectivity'] = full_neuts.apply(
    lambda row: row['fraction infectivity_y'] if row['_merge'] == 'right_only' 
    else row['fraction infectivity_x'], axis=1
)

full_neuts = full_neuts.drop(['fraction infectivity_x', 'fraction infectivity_y', '_merge'], axis=1)

full_neuts = full_neuts.loc[full_neuts['virus'] == 'HK19']

full_neuts

Unnamed: 0,serum,virus,replicate,concentration,fraction infectivity
0,2388,HK19,1,0.050000,0.065399
1,2388,HK19,1,0.025000,0.033575
2,2388,HK19,1,0.012500,0.037981
3,2388,HK19,1,0.006250,0.097314
4,2388,HK19,1,0.003125,0.272056
...,...,...,...,...,...
668,3976,HK19,1,0.006250,1.141156
669,3976,HK19,1,0.003125,1.249474
670,3976,HK19,1,0.001563,1.154547
671,3976,HK19,1,0.000781,1.143886


In [6]:
neut_data_consolidated = pd.read_csv('neut_data/230309_consolidated_neuts_5yo.csv')

In [9]:
fits = neutcurve.CurveFits(neut_data_consolidated)

In [10]:
ic50s = fits.fitParams(ics=[50])
ic50s['reciprocal'] = ic50s.apply(lambda x: 1/(x['ic50']), axis=1)

ic50s = ic50s.sort_values('reciprocal').reset_index(drop=True)
ic50s['serum'] = ic50s['serum'].astype(str)

ic50s['age_group'] = '0-5'

In [11]:
ic50s_filt = ic50s.loc[ic50s['reciprocal'] < 5000]

p = (
    ggplot(
        data=ic50s,
        mapping = aes(x='reorder(serum, reciprocal)', y='reciprocal')) +
    geom_point() +
    theme(figure_size=(9, 5),
         panel_grid_major_x=element_blank(),
         ) +
    scale_y_log10() +
    scale_fill_manual(values=CBPALETTE[1:]) +
    labs(x='serum', 
         y='reciprocal IC50', title='HK/19 neutralizing titers from children <5yo')

)

_ = p.draw()

In [12]:
neut_data_teens = pd.read_csv('neut_data/230303_neuts_tidy.csv')
fits_teens = neutcurve.CurveFits(neut_data_teens)

ic50s_teen = fits_teens.fitParams(ics=[50])
ic50s_teen['reciprocal'] = ic50s_teen.apply(lambda x: 1/(x['ic50']), axis=1)

ic50s_teen = ic50s_teen.sort_values('reciprocal').reset_index(drop=True)
ic50s_teen['serum'] = ic50s_teen['serum'].astype(str)

ic50s_teen['age_group'] = '15-18'

In [35]:
neut_data_adults = pd.read_csv('neut_data/230417_adult-neuts_tidy_edited.csv')
fits_adults = neutcurve.CurveFits(neut_data_adults)

ic50s_adults = fits_adults.fitParams(ics=[50])
ic50s_adults['reciprocal'] = ic50s_adults.apply(lambda x: 1/(x['ic50']), axis=1)

ic50s_adults = ic50s_adults.sort_values('reciprocal').reset_index(drop=True)
ic50s_adults['serum'] = ic50s_adults['serum'].astype(str)

ic50s_adults['age_group'] = '40-45'

In [36]:
ic50s_full = pd.concat([ic50s, ic50s_teen, ic50s_adults])
ic50s_full

Unnamed: 0,serum,virus,replicate,nreplicates,ic50,ic50_bound,ic50_str,midpoint,slope,top,bottom,reciprocal,age_group
0,4136,HK19,average,1,0.05,lower,>0.05,0.051,2.1,1,0,20,0-5
1,2320,HK19,average,1,0.05,lower,>0.05,0.055,22,1,0,20,0-5
2,4115,HK19,average,1,0.05,lower,>0.05,0.059,1.8,1,0,20,0-5
3,2322,HK19,average,1,0.05,lower,>0.05,0.48,10,1,0,20,0-5
4,4493,HK19,average,1,0.05,lower,>0.05,0.47,9.8,1,0,20,0-5
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,18C,HK19,average,1,0.0028,interpolated,0.00278,0.0028,1.9,1,0,3.6e+02,40-45
26,74C,HK19,average,1,0.0021,interpolated,0.00208,0.0021,1.7,1,0,4.8e+02,40-45
27,33C,HK19,average,1,0.0012,interpolated,0.00118,0.0012,1.6,1,0,8.5e+02,40-45
28,215C,HK19,average,1,0.00039,upper,<0.000391,0.00024,1.9,1,0,2.6e+03,40-45


In [37]:
peds = [3944, 2389, 2323, 2388, 3973, 4299, 4584, 2367]
teens = [2350, 2365, 2382, 3866, 2380, 3856, 3857, 3862]
adults = ['33C', '34C', '197C', '199C', '215C', '210C', '74C', '68C', '150C', '18C',]
infant=[2462]

mapped_samples = []
# for cohort in peds:
for serum in adults:
    serum = str(serum)
    mapped_samples.append(serum)

ic50s_mapped = ic50s_full.loc[ic50s_full['serum'].isin(mapped_samples)]

ic50s_mapped = ic50s_mapped.sort_values(['ic50'])

ic50s_mapped.style.format({'ic50': "{:.2E}"})

Unnamed: 0,serum,virus,replicate,nreplicates,ic50,ic50_bound,ic50_str,midpoint,slope,top,bottom,reciprocal,age_group
29,34C,HK19,average,1,0.000391,upper,<0.000391,0.000324,2.323503,1,0,2560.0,40-45
28,215C,HK19,average,1,0.000391,upper,<0.000391,0.000237,1.920958,1,0,2559.967232,40-45
27,33C,HK19,average,1,0.00118,interpolated,0.00118,0.00118,1.573096,1,0,847.495387,40-45
26,74C,HK19,average,1,0.00208,interpolated,0.00208,0.002076,1.720802,1,0,481.808605,40-45
25,18C,HK19,average,1,0.00278,interpolated,0.00278,0.002783,1.890333,1,0,359.379719,40-45
23,197C,HK19,average,1,0.00466,interpolated,0.00466,0.004663,2.72204,1,0,214.473317,40-45
21,199C,HK19,average,1,0.00508,interpolated,0.00508,0.005075,2.701512,1,0,197.0342,40-45
19,210C,HK19,average,1,0.0058,interpolated,0.0058,0.005798,2.895666,1,0,172.481539,40-45
14,150C,HK19,average,1,0.00642,interpolated,0.00642,0.006416,2.888437,1,0,155.859015,40-45
11,68C,HK19,average,1,0.00719,interpolated,0.00719,0.007191,3.14191,1,0,139.06694,40-45


In [40]:
neut_data_adults = pd.read_csv('neut_data/230418_adult-sera-final_tidy.csv')
fits_adults = neutcurve.CurveFits(neut_data_adults)

ic50s_adults = fits_adults.fitParams(ics=[50])
ic50s_adults['reciprocal'] = ic50s_adults.apply(lambda x: 1/(x['ic50']), axis=1)

ic50s_adults.sort_values(['ic50']).style.format({'ic50': "{:.2E}"})

Unnamed: 0,serum,virus,replicate,nreplicates,ic50,ic50_bound,ic50_str,midpoint,slope,top,bottom,reciprocal
1,215C,HK19,average,2,0.000198,interpolated,0.000198,0.000198,1.540072,1,0,5040.753176
0,34C,HK19,average,2,0.000279,interpolated,0.000279,0.000279,2.135661,1,0,3584.840814
2,33C,HK19,average,2,0.00131,interpolated,0.00131,0.001307,1.973236,1,0,765.191226
3,197C,HK19,average,2,0.0033,interpolated,0.0033,0.003295,2.032573,1,0,303.480843
4,199C,HK19,average,2,0.00425,interpolated,0.00425,0.004247,2.394697,1,0,235.485471


In [None]:
neut_data_adults = pd.read_csv('neut_data/230303_neuts_tidy.csv')
fits_teens = neutcurve.CurveFits(neut_data_teens)

ic50s_teen = fits_teens.fitParams(ics=[50])
ic50s_teen['reciprocal'] = ic50s_teen.apply(lambda x: 1/(x['ic50']), axis=1)

ic50s_teen = ic50s_teen.sort_values('reciprocal').reset_index(drop=True)
ic50s_teen['serum'] = ic50s_teen['serum'].astype(str)

ic50s_teen['age_group'] = '15-18'

In [7]:
ic50s_full.to_csv('full_ic50s.csv')

In [2]:
ic50s_full = pd.read_csv('full_ic50s.csv')

In [4]:
p = (
    ggplot(
        data=ic50s_full,
        mapping = aes(x='reorder(serum, reciprocal)', y='reciprocal')) +
    geom_point() +
    theme(figure_size=(12, 4),
         panel_grid_major_x=element_blank(),
         ) +
    scale_y_log10() +
    scale_fill_manual(values=CBPALETTE[1:]) +
    labs(x='serum', 
         y='reciprocal IC50', title='HK/19 neutralizing titers by age group')

)

p = p + facet_wrap('~ age_group', nrow = 1, scales='free_x')
_ = p.draw()

p.save('figures/230309_ped-ic50-summary_test.pdf')

In [None]:
p = (
    ggplot(
        no_ab_220606_threshold,
        aes(x='no-antibody_count', fill='library')) + 
    geom_histogram(position=position_dodge(), binwidth=0.5) + 
    theme(figure_size=(10, 5),
         panel_grid_major_x=element_blank(),
         ) +
    scale_fill_manual(values=CBPALETTE[1:]) +
    labs(x='barcode counts', 
         y='number variants', title='distribution of barcode counts for variants below threshold in 220606 no-Ab infection')
) 

_ = p.draw()

In [None]:
neut_data_pbs = pd.read_csv('neut_data/230208_neuts_tidy_PBS.csv')

# neut_data_pbs['serum'] =neut_data_pbs['serum'].astype(object)

In [None]:
neut_data_run2 = pd.read_csv('neut_data/230210_neuts_tidy.csv')

# fit with neutcurve package
fits = neutcurve.CurveFits(neut_data_run2)

In [None]:
neut_data_run3 = pd.read_csv('neut_data/230217_neuts_tidy.csv')

In [None]:
neut_data_run4 = pd.read_csv('neut_data/230223_neuts_tidy.csv')