Skip to content

Commit

Permalink
Merge 1e809f8 into ef7747b
Browse files Browse the repository at this point in the history
  • Loading branch information
julia326 committed Jan 30, 2018
2 parents ef7747b + 1e809f8 commit d4e4cdd
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 32 deletions.
6 changes: 3 additions & 3 deletions test/test_mutant_protein_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand Down Expand Up @@ -88,7 +88,7 @@ def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand All @@ -114,7 +114,7 @@ def test_keep_top_k_epitopes():
reads_generator = allele_reads_generator_from_args(args)

keep_k_epitopes = 3
ranked_list = ranked_vaccine_peptides(
ranked_list, _ = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=RandomBindingPredictor(["H-2-Kb", "H-2-Db"]),
vaccine_peptide_length=15,
Expand Down
8 changes: 4 additions & 4 deletions vaxrank/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def ranked_variant_list_with_metadata(args):
reads_generator = allele_reads_generator_from_args(args)
mhc_predictor = mhc_binding_predictor_from_args(args)

ranked_list = ranked_vaccine_peptides(
ranked_list, variants_count_dict = ranked_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=args.vaccine_peptide_length,
Expand All @@ -282,15 +282,15 @@ def ranked_variant_list_with_metadata(args):

ranked_list_for_report = ranked_list[:args.max_mutations_in_report]

num_coding_effect_variants = len(
variants.effects().drop_silent_and_noncoding().groupby_variant())
patient_info = PatientInfo(
patient_id=args.output_patient_id,
vcf_paths=variants.sources,
bam_path=args.bam,
mhc_alleles=mhc_alleles,
num_somatic_variants=len(variants),
num_coding_effect_variants=num_coding_effect_variants,
num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'],
num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'],
num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides']
)

# return variants, patient info, and command-line args
Expand Down
55 changes: 40 additions & 15 deletions vaxrank/core_logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from __future__ import absolute_import, print_function, division
from collections import defaultdict
import logging

from numpy import isclose
Expand All @@ -31,7 +32,7 @@

def vaccine_peptides_for_variant(
variant,
isovar_protein_sequences,
isovar_protein_sequence,
mhc_predictor,
vaccine_peptide_length,
padding_around_mutation,
Expand All @@ -41,14 +42,9 @@ def vaccine_peptides_for_variant(
"""
Returns sorted list of VaccinePeptide objects.
"""
isovar_protein_sequences = list(isovar_protein_sequences)
if len(isovar_protein_sequences) == 0:
logger.info("No protein sequences for %s", variant)
return []

protein_fragment = MutantProteinFragment.from_isovar_protein_sequence(
variant=variant,
protein_sequence=isovar_protein_sequences[0])
protein_sequence=isovar_protein_sequence)

logger.info(
"Mutant protein fragment for %s: %s",
Expand Down Expand Up @@ -95,7 +91,7 @@ def vaccine_peptides_for_variant(
n_total_candidates = len(candidate_vaccine_peptides)
if n_total_candidates == 0:
logger.info("No candidate peptides for variant %s", variant.short_description)
return candidate_vaccine_peptides
return []

max_score = max(vp.combined_score for vp in candidate_vaccine_peptides)
if isclose(max_score, 0.0):
Expand Down Expand Up @@ -138,7 +134,9 @@ def generate_vaccine_peptides(
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
"""
Returns dictionary mapping each variant to list of VaccinePeptide objects.
Returns a tuple of two values:
- dictionary mapping each variant to list of VaccinePeptide objects
- dictionary containing some variant counts for report display
"""

# total number of amino acids is the vaccine peptide length plus the
Expand All @@ -156,18 +154,43 @@ def generate_vaccine_peptides(
max_protein_sequences_per_variant=1)

result_dict = {}
counts_dict = defaultdict(int)
for variant, isovar_protein_sequences in protein_sequences_generator:
if len(variant.effects().drop_silent_and_noncoding()) > 0:
counts_dict['num_coding_effect_variants'] += 1
isovar_protein_sequences = list(isovar_protein_sequences)
if len(isovar_protein_sequences) == 0:
# this means the variant RNA support is below threshold
logger.info("No protein sequences for %s", variant)
continue

# use the first protein sequence - why?
counts_dict['num_variants_with_rna_support'] += 1
isovar_protein_sequence = isovar_protein_sequences[0]
vaccine_peptides = vaccine_peptides_for_variant(
variant=variant,
isovar_protein_sequences=isovar_protein_sequences,
isovar_protein_sequence=isovar_protein_sequence,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=vaccine_peptide_length,
padding_around_mutation=padding_around_mutation,
max_vaccine_peptides_per_variant=max_vaccine_peptides_per_variant,
num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep,
min_epitope_score=min_epitope_score)

# do any of this variant's vaccine peptides contain mutant epitopes?
any_mutant_epitopes = False
for vaccine_peptide in vaccine_peptides:
if vaccine_peptide.contains_epitopes():
any_mutant_epitopes = True
break
if any_mutant_epitopes:
counts_dict['num_variants_with_vaccine_peptides'] += 1
result_dict[variant] = vaccine_peptides
return result_dict

for key, value in counts_dict.items():
logger.info('%s: %d', key, value)

return result_dict, counts_dict

def ranked_vaccine_peptides(
reads_generator,
Expand All @@ -181,10 +204,12 @@ def ranked_vaccine_peptides(
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
"""
Returns sorted list whose first element is a Variant and whose second
element is a list of VaccinePeptide objects.
Returns a tuple of two values:
- sorted list whose first element is a Variant and whose second
element is a list of VaccinePeptide objects
- dictionary containing some variant counts for report display
"""
variants_to_vaccine_peptides_dict = generate_vaccine_peptides(
variants_to_vaccine_peptides_dict, variant_counts_dict = generate_vaccine_peptides(
reads_generator=reads_generator,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=vaccine_peptide_length,
Expand All @@ -199,4 +224,4 @@ def ranked_vaccine_peptides(
result_list.sort(
key=lambda x: x[1][0].combined_score if len(x[1]) > 0 else 0.0,
reverse=True)
return result_list
return result_list, variant_counts_dict
17 changes: 9 additions & 8 deletions vaxrank/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
"mhc_alleles",
"num_somatic_variants",
"num_coding_effect_variants",
"num_variants_with_rna_support",
"num_variants_with_vaccine_peptides",
))


Expand Down Expand Up @@ -106,6 +108,11 @@ def _patient_info(self):
('Total number of somatic variants', self.patient_info.num_somatic_variants),
('Somatic variants with predicted coding effects',
self.patient_info.num_coding_effect_variants),
('Somatic variants with predicted coding effects and RNA support',
self.patient_info.num_variants_with_rna_support),
('Somatic variants with predicted coding effects, RNA support and predicted MHC '
'expression',
self.patient_info.num_variants_with_vaccine_peptides),
])
return patient_info

Expand Down Expand Up @@ -303,7 +310,7 @@ def compute_template_data(self):

peptides = []
for j, vaccine_peptide in enumerate(vaccine_peptides):
if not peptide_contains_epitopes(vaccine_peptide):
if not vaccine_peptide.contains_epitopes():
logger.info('No epitopes for peptide: %s', vaccine_peptide)
continue

Expand Down Expand Up @@ -459,12 +466,6 @@ def resize_columns(worksheet, amino_acids_col, pos_col):
worksheet.set_column('%s:%s' % (amino_acids_col, amino_acids_col), 40)
worksheet.set_column('%s:%s' % (pos_col, pos_col), 12)


def peptide_contains_epitopes(vaccine_peptide):
"""Returns true if vaccine peptide contains mutant epitopes."""
return len(vaccine_peptide.mutant_epitope_predictions) > 0


def make_minimal_neoepitope_report(
ranked_variants_with_vaccine_peptides,
num_epitopes_per_peptide=None,
Expand Down Expand Up @@ -545,7 +546,7 @@ def make_csv_report(
for j, vaccine_peptide in enumerate(vaccine_peptides):

# if there are no predicted epitopes, exclude this peptide from the report
if not peptide_contains_epitopes(vaccine_peptide):
if not vaccine_peptide.contains_epitopes():
logger.info('No epitopes for peptide: %s', vaccine_peptide)
continue

Expand Down
7 changes: 5 additions & 2 deletions vaxrank/vaccine_peptide.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ def __new__(
cls,
mutant_protein_fragment,
epitope_predictions,
num_mutant_epitopes_to_keep=10000,
min_epitope_score=0):
num_mutant_epitopes_to_keep=10000):
# only keep the top k epitopes
mutant_epitope_predictions = sorted([
p for p in epitope_predictions if p.overlaps_mutation and not p.occurs_in_reference
Expand Down Expand Up @@ -178,6 +177,10 @@ def lexicographic_sort_key(self):
extra_score_tuple
)

def contains_epitopes(self):
"""Returns true if this vaccine peptide contains mutant epitopes."""
return len(self.mutant_epitope_predictions) > 0

@property
def expression_score(self):
return np.sqrt(self.mutant_protein_fragment.n_alt_reads)
Expand Down

0 comments on commit d4e4cdd

Please sign in to comment.