Skip to content

Commit

Permalink
Merge pull request #152 from openvax/add-counts
Browse files Browse the repository at this point in the history
Computing some counts for the vaxrank report in the core logic
  • Loading branch information
julia326 authored Feb 9, 2018
2 parents e3ff0c3 + 28181a7 commit e1ead87
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 49 deletions.
6 changes: 3 additions & 3 deletions test/test_mutant_protein_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand All @@ -114,7 +114,7 @@ def test_keep_top_k_epitopes():
reads_generator = allele_reads_generator_from_args(args)

keep_k_epitopes = 3
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand Down
10 changes: 5 additions & 5 deletions vaxrank/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ def add_vaccine_peptide_args(arg_parser):

vaccine_peptide_group.add_argument(
"--min-epitope-score",
default=0.001,
default=1e-10,
type=float,
help="Ignore epitopes whose normalized score falls below this threshold")

Expand Down Expand Up @@ -268,7 +268,7 @@ def ranked_variant_list_with_metadata(args):
reads_generator = allele_reads_generator_from_args(args)
mhc_predictor = mhc_binding_predictor_from_args(args)

ranked_list = ranked_vaccine_peptides(
ranked_list, variants_count_dict = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=args.vaccine_peptide_length,
Expand All @@ -282,15 +282,15 @@ def ranked_variant_list_with_metadata(args):

ranked_list_for_report = ranked_list[:args.max_mutations_in_report]

num_coding_effect_variants = len(
variants.effects().drop_silent_and_noncoding().groupby_variant())
patient_info = PatientInfo(
patient_id=args.output_patient_id,
vcf_paths=variants.sources,
bam_path=args.bam,
mhc_alleles=mhc_alleles,
num_somatic_variants=len(variants),
num_coding_effect_variants=num_coding_effect_variants,
num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'],
num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'],
num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides']
)

# return variants, patient info, and command-line args
Expand Down
57 changes: 42 additions & 15 deletions vaxrank/core_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from __future__ import absolute_import, print_function, division
from collections import defaultdict
import logging

from numpy import isclose
Expand All @@ -31,7 +32,7 @@

def vaccine_peptides_for_variant(
variant,
isovar_protein_sequences,
isovar_protein_sequence,
mhc_predictor,
vaccine_peptide_length,
padding_around_mutation,
Expand All @@ -41,14 +42,9 @@ def vaccine_peptides_for_variant(
"""
Returns sorted list of VaccinePeptide objects.
"""
isovar_protein_sequences = list(isovar_protein_sequences)
if len(isovar_protein_sequences) == 0:
logger.info("No protein sequences for %s", variant)
return []

protein_fragment = MutantProteinFragment.from_isovar_protein_sequence(
variant=variant,
protein_sequence=isovar_protein_sequences[0])
protein_sequence=isovar_protein_sequence)

logger.info(
"Mutant protein fragment for %s: %s",
Expand Down Expand Up @@ -95,7 +91,7 @@ def vaccine_peptides_for_variant(
n_total_candidates = len(candidate_vaccine_peptides)
if n_total_candidates == 0:
logger.info("No candidate peptides for variant %s", variant.short_description)
return candidate_vaccine_peptides
return []

max_score = max(vp.combined_score for vp in candidate_vaccine_peptides)
if isclose(max_score, 0.0):
Expand Down Expand Up @@ -138,7 +134,9 @@ def generate_vaccine_peptides(
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
"""
Returns dictionary mapping each variant to list of VaccinePeptide objects.
Returns a tuple of two values:
- dictionary mapping each variant to list of VaccinePeptide objects
- dictionary containing some variant counts for report display
"""

# total number of amino acids is the vaccine peptide length plus the
Expand All @@ -156,18 +154,43 @@ def generate_vaccine_peptides(
max_protein_sequences_per_variant=1)

result_dict = {}
counts_dict = defaultdict(int)
for variant, isovar_protein_sequences in protein_sequences_generator:
if len(variant.effects().drop_silent_and_noncoding()) > 0:
counts_dict['num_coding_effect_variants'] += 1
isovar_protein_sequences = list(isovar_protein_sequences)
if len(isovar_protein_sequences) == 0:
# this means the variant RNA support is below threshold
logger.info("No protein sequences for %s", variant)
continue

# use the first protein sequence - why?
counts_dict['num_variants_with_rna_support'] += 1
isovar_protein_sequence = isovar_protein_sequences[0]
vaccine_peptides = vaccine_peptides_for_variant(
variant=variant,
isovar_protein_sequences=isovar_protein_sequences,
isovar_protein_sequence=isovar_protein_sequence,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=vaccine_peptide_length,
padding_around_mutation=padding_around_mutation,
max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant,
num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep,
min_epitope_score=min_epitope_score)

# do any of this variant's vaccine peptides contain mutant epitopes?
any_mutant_epitopes = False
for vaccine_peptide in vaccine_peptides:
if vaccine_peptide.contains_mutant_epitopes():
any_mutant_epitopes = True
break
if any_mutant_epitopes:
counts_dict['num_variants_with_vaccine_peptides'] += 1
result_dict[variant] = vaccine_peptides
return result_dict

for key, value in counts_dict.items():
logger.info('%s: %d', key, value)

return result_dict, counts_dict

def ranked_vaccine_peptides(
reads_generator,
Expand All @@ -181,10 +204,13 @@ def ranked_vaccine_peptides(
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
"""
Returns sorted list whose first element is a Variant and whose second
element is a list of VaccinePeptide objects.
Returns a tuple of two values:
- sorted list whose first element is a Variant and whose second
element is a list of VaccinePeptide objects
- dictionary containing "funnel" variant counts for report display. Keys are e.g.
"num_variants_with_rna_support", values are integer counts.
"""
variants_to_vaccine_peptides_dict = generate_vaccine_peptides(
variants_to_vaccine_peptides_dict, variant_counts_dict = generate_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=vaccine_peptide_length,
Expand All @@ -196,7 +222,8 @@ def ranked_vaccine_peptides(
num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep,
min_epitope_score=min_epitope_score)
result_list = list(variants_to_vaccine_peptides_dict.items())
# TODO: move this sort key into its own function, also make less nuts
result_list.sort(
key=lambda x: x[1][0].combined_score if len(x[1]) > 0 else 0.0,
reverse=True)
return result_list
return result_list, variant_counts_dict
2 changes: 1 addition & 1 deletion vaxrank/epitope_prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def logistic_epitope_score(
self,
midpoint=350.0,
width=150.0,
ic50_cutoff=2000.0):
ic50_cutoff=5000.0): # TODO: add these default values into CLI as arguments
"""
Map from IC50 values to score where 1.0 = strong binder, 0.0 = weak binder
Default midpoint and width for logistic determined by max likelihood fit
Expand Down
46 changes: 23 additions & 23 deletions vaxrank/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"mhc_alleles",
"num_somatic_variants",
"num_coding_effect_variants",
"num_variants_with_rna_support",
"num_variants_with_vaccine_peptides",
))


Expand Down Expand Up @@ -106,6 +108,11 @@ def _patient_info(self):
('Total number of somatic variants', self.patient_info.num_somatic_variants),
('Somatic variants with predicted coding effects',
self.patient_info.num_coding_effect_variants),
('Somatic variants with predicted coding effects and RNA support',
self.patient_info.num_variants_with_rna_support),
('Somatic variants with predicted coding effects, RNA support and predicted MHC '
'ligands',
self.patient_info.num_variants_with_vaccine_peptides),
])
return patient_info

Expand All @@ -115,13 +122,13 @@ def _variant_data(self, top_vaccine_peptide):
"""
variant_data = OrderedDict()
mutant_protein_fragment = top_vaccine_peptide.mutant_protein_fragment
top_score = round(top_vaccine_peptide.combined_score, 4)
top_score = _sanitize(top_vaccine_peptide.combined_score)
variant_data = OrderedDict([
('Gene name', mutant_protein_fragment.gene_name),
('Top score', top_score),
('Reads supporting variant allele', mutant_protein_fragment.n_alt_reads),
('Reads supporting reference allele', mutant_protein_fragment.n_ref_reads),
('Reads supporting other alleles', mutant_protein_fragment.n_other_reads),
('RNA reads supporting variant allele', mutant_protein_fragment.n_alt_reads),
('RNA reads supporting reference allele', mutant_protein_fragment.n_ref_reads),
('RNA reads supporting other alleles', mutant_protein_fragment.n_other_reads),
])
return variant_data

Expand Down Expand Up @@ -182,9 +189,9 @@ def _peptide_data(self, vaccine_peptide, transcript_name):
peptide_data = OrderedDict([
('Transcript name', transcript_name),
('Length', len(amino_acids)),
('Expression score', round(vaccine_peptide.expression_score, 4)),
('Mutant epitope score', round(vaccine_peptide.mutant_epitope_score, 4)),
('Combined score', round(vaccine_peptide.combined_score, 4)),
('Expression score', _sanitize(vaccine_peptide.expression_score)),
('Mutant epitope score', _sanitize(vaccine_peptide.mutant_epitope_score)),
('Combined score', _sanitize(vaccine_peptide.combined_score)),
('Max coding sequence coverage',
mutant_protein_fragment.n_alt_reads_supporting_protein_sequence),
('Mutant amino acids', mutant_protein_fragment.n_mutant_amino_acids),
Expand All @@ -199,8 +206,8 @@ def _manufacturability_data(self, vaccine_peptide):
"""
scores = vaccine_peptide.manufacturability_scores
manufacturability_data = OrderedDict([
('C-terminal 7mer GRAVY score', round(scores.cterm_7mer_gravy_score, 4)),
('Max 7mer GRAVY score', round(scores.max_7mer_gravy_score, 4)),
('C-terminal 7mer GRAVY score', _sanitize(scores.cterm_7mer_gravy_score)),
('Max 7mer GRAVY score', _sanitize(scores.max_7mer_gravy_score)),
('N-terminal Glutamine, Glutamic Acid, or Cysteine',
int(scores.difficult_n_terminal_residue)),
('C-terminal Cysteine', int(scores.c_terminal_cysteine)),
Expand All @@ -223,8 +230,7 @@ def _epitope_data(self, epitope_prediction):
epitope_data = OrderedDict([
('Sequence', epitope_prediction.peptide_sequence),
('IC50', '%.2f nM' % epitope_prediction.ic50),
('Score', round(
epitope_prediction.logistic_epitope_score(), 4)),
('Score', _sanitize(epitope_prediction.logistic_epitope_score())),
('Allele', epitope_prediction.allele.replace('HLA-', '')),
('WT sequence', epitope_prediction.wt_peptide_sequence),
('WT IC50', wt_ic50_str),
Expand Down Expand Up @@ -303,7 +309,7 @@ def compute_template_data(self):

peptides = []
for j, vaccine_peptide in enumerate(vaccine_peptides):
if not peptide_contains_epitopes(vaccine_peptide):
if not vaccine_peptide.contains_mutant_epitopes():
logger.info('No epitopes for peptide: %s', vaccine_peptide)
continue

Expand Down Expand Up @@ -448,7 +454,7 @@ def _sanitize(val):
if type(val) == bool:
val = int(val)
elif type(val) == float:
val = round(val, 4)
val = round(val, 10)
return val

def resize_columns(worksheet, amino_acids_col, pos_col):
Expand All @@ -459,12 +465,6 @@ def resize_columns(worksheet, amino_acids_col, pos_col):
worksheet.set_column('%s:%s' % (amino_acids_col, amino_acids_col), 40)
worksheet.set_column('%s:%s' % (pos_col, pos_col), 12)


def peptide_contains_epitopes(vaccine_peptide):
"""Returns true if vaccine peptide contains mutant epitopes."""
return len(vaccine_peptide.mutant_epitope_predictions) > 0


def make_minimal_neoepitope_report(
ranked_variants_with_vaccine_peptides,
num_epitopes_per_peptide=None,
Expand Down Expand Up @@ -545,7 +545,7 @@ def make_csv_report(
for j, vaccine_peptide in enumerate(vaccine_peptides):

# if there are no predicted epitopes, exclude this peptide from the report
if not peptide_contains_epitopes(vaccine_peptide):
if not vaccine_peptide.contains_mutant_epitopes():
logger.info('No epitopes for peptide: %s', vaccine_peptide)
continue

Expand All @@ -560,9 +560,9 @@ def make_csv_report(
vaccine_peptide.mutant_protein_fragment.mutant_amino_acid_start_offset)
columns["mutation_end"].append(
vaccine_peptide.mutant_protein_fragment.mutant_amino_acid_end_offset)
columns["combined_score"].append(round(vaccine_peptide.combined_score, 4))
columns["expression_score"].append(round(vaccine_peptide.expression_score, 4))
columns["mutant_epitope_score"].append(round(vaccine_peptide.mutant_epitope_score, 4))
columns["combined_score"].append(_sanitize(vaccine_peptide.combined_score))
columns["expression_score"].append(_sanitize(vaccine_peptide.expression_score))
columns["mutant_epitope_score"].append(_sanitize(vaccine_peptide.mutant_epitope_score))
for field in ManufacturabilityScores._fields:
columns[field].append(
_sanitize(getattr(vaccine_peptide.manufacturability_scores, field)))
Expand Down
6 changes: 4 additions & 2 deletions vaxrank/vaccine_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def __new__(
cls,
mutant_protein_fragment,
epitope_predictions,
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
num_mutant_epitopes_to_keep=10000):
# only keep the top k epitopes
mutant_epitope_predictions = sorted([
p for p in epitope_predictions if p.overlaps_mutation and not p.occurs_in_reference
Expand Down Expand Up @@ -178,6 +177,9 @@ def lexicographic_sort_key(self):
extra_score_tuple
)

def contains_mutant_epitopes(self):
return len(self.mutant_epitope_predictions) > 0

@property
def expression_score(self):
return np.sqrt(self.mutant_protein_fragment.n_alt_reads)
Expand Down

0 comments on commit e1ead87

Please sign in to comment.