Creating ReplayPhenotype and Fitness objects

In [1]:
from gcdyn.fitness import Fitness
from gcdyn.replay import DMSPhenotype
from Bio import SeqIO

In [2]:
replay_phenotype = DMSPhenotype(
    1,
    1,
    336,
    "https://raw.githubusercontent.com/jbloomlab/Ab-CGGnaive_DMS/main/data/CGGnaive_sites.csv",
    "Linear.model",
    ["delta_log10_KD", "expression"],
    -10.43,
)


fasta_path = "gcreplay_samples/gctree_PR1.3-1-LB-16-GC.fasta"
seqs = [
    str(seq_record.seq)
    for seq_record in SeqIO.parse(fasta_path, "fasta")
    if seq_record.id != "naive"
]

fit = Fitness(Fitness.sigmoidal_fitness)
fitness_df = fit.normalized_fitness_df(seqs, calculate_KD=replay_phenotype.calculate_KD)

Testing different antigen concentrations vs antigen bound

In [None]:
import pandas as pd
from plotnine import ggplot, geom_histogram, aes, facet_wrap, ggtitle, xlim, ylim

antigen_bound_fracs = []
concentrations = []


def frac_antigen_bound(
    sequence_KDs: list[float], concentration_antigen: int
) -> list[float]:
    thetas = []
    for seq_KD in sequence_KDs:
        theta = concentration_antigen / (seq_KD + concentration_antigen)
        thetas.append(theta)
    return thetas


for c in range(6, 11):  # concentration of antigen from 10^-6 to 10^-10
    concentration_antigen = 10 ** (-1 * c)
    fit = Fitness(Fitness.sigmoidal_fitness, concentration_antigen)
    antigen_bound_fracs.extend(
        frac_antigen_bound(fitness_df["KD"], concentration_antigen)
    )
    concentrations.extend([concentration_antigen] * len(fitness_df["KD"]))

antigen_bound_df = pd.DataFrame(
    {"concentration_antigen": concentrations, "frac_antigen_bound": antigen_bound_fracs}
)

(
    ggplot(antigen_bound_df)
    + facet_wrap(facets="~concentration_antigen")
    + aes(x="frac_antigen_bound", color="factor(concentration_antigen)")
    + geom_histogram(binwidth=0.05)
    + xlim(0, 1.05)
)

Mapping to fitness (sigmoidal)

In [None]:
from plotnine import geom_point

fitness_df["frac_antigen_bound"] = frac_antigen_bound(fitness_df["KD"], 10**-9)
ggplot(fitness_df) + aes(x="frac_antigen_bound", y="t_cell_help") + geom_point()

Normalized fitness (T cell help)

In [None]:
(
    ggplot(fitness_df)
    + aes(x="frac_antigen_bound", y="normalized_t_cell_help")
    + geom_point()
)

Map to fitness (linear)

In [None]:
linfit = Fitness(Fitness.linear_fitness)
linfit_fitness_df = linfit.normalized_fitness_df(
    seqs, calculate_KD=replay_phenotype.calculate_KD
)
linfit_fitness_df["frac_antigen_bound"] = frac_antigen_bound(
    linfit_fitness_df["KD"], 10**-9
)
ggplot(linfit_fitness_df) + aes(x="frac_antigen_bound", y="t_cell_help") + geom_point()

In [None]:
# map normalized fitness linearly to number of cell divisions
test_cell_divs = linfit.cell_divisions_from_tfh_linear(
    linfit_fitness_df["normalized_t_cell_help"], 200
)
linfit_fitness_df["cell_divs"] = test_cell_divs
ggplot(linfit_fitness_df) + aes(x="frac_antigen_bound", y="cell_divs") + geom_point()

Uniform fitness

In [None]:
uniform_fit = Fitness(Fitness.uniform_fitness)
uniform_fitness_df = uniform_fit.normalized_fitness_df(
    seqs, calculate_KD=replay_phenotype.calculate_KD
)
uniform_fitness_df["frac_antigen_bound"] = frac_antigen_bound(
    uniform_fitness_df["KD"], 10**-9
)
(
    ggplot(uniform_fitness_df)
    + aes(x="frac_antigen_bound", y="normalized_t_cell_help")
    + geom_point()
)