Skip to content

Commit

Permalink
PEP8 and style changes, made GenePathwayCheck easier to read
Browse files Browse the repository at this point in the history
  • Loading branch information
iskandr committed Oct 10, 2018
1 parent 47decce commit 9334465
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 123 deletions.
6 changes: 4 additions & 2 deletions vaxrank/cli.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2016. Mount Sinai School of Medicine
# Copyright (c) 2016-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -216,7 +216,9 @@ def add_vaccine_peptide_args(arg_parser):
"--min-epitope-score",
default=1e-10,
type=float,
help="Ignore epitopes whose normalized score falls below this threshold")
help=(
"Ignore predicted MHC ligands whose normalized binding score "
"falls below this threshold"))


def add_supplemental_report_args(arg_parser):
Expand Down
51 changes: 26 additions & 25 deletions vaxrank/core_logic.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2016. Mount Sinai School of Medicine
# Copyright (c) 2016-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -13,11 +13,10 @@
# limitations under the License.

from __future__ import absolute_import, print_function, division
from collections import defaultdict, namedtuple, OrderedDict
from collections import OrderedDict
import logging

from numpy import isclose, asarray
import pandas as pd
from numpy import isclose

from isovar.protein_sequences import (
reads_generator_to_protein_sequences_generator,
Expand All @@ -26,12 +25,11 @@
from .mutant_protein_fragment import MutantProteinFragment
from .epitope_prediction import predict_epitopes, slice_epitope_predictions
from .vaccine_peptide import VaccinePeptide
from .gene_pathway_check import GenePathwayCheck


logger = logging.getLogger(__name__)

class VaxrankCoreLogic:

class VaxrankCoreLogic(object):
def __init__(
self,
variants,
Expand All @@ -47,50 +45,53 @@ def __init__(
min_epitope_score=0.0,
gene_pathway_check=None):
"""
Construct a VaxrankCoreLogic object.
Parameters
----------
variants : VariantCollection
Variant objects to evaluate for vaccine inclusion
reads_generator : generator
Yields sequence of varcode.Variant objects paired with sequences of AlleleRead objects
that support that variant.
Yields sequence of varcode.Variant objects paired with sequences of
AlleleRead objects that support that variant.
mhc_predictor : mhctools.BasePredictor
Object with predict_peptides method, used for making pMHC binding predictions
Object with predict_peptides method, used for making pMHC binding
predictions
vaccine_peptide_length : int
Length of vaccine SLP to construct
padding_around_mutation : int
Number of off-center windows around the mutation to consider as vaccine peptides.
Number of off-center windows around the mutation to consider as vaccine
peptides.
max_vaccine_peptides_per_variant : int
Number of vaccine peptides to generate for each mutation.
min_alt_rna_reads : int
Drop variant sequences at loci with fewer than this number of reads supporting the alt
allele.
Drop variant sequences at loci with fewer than this number of reads
supporting the alt allele.
min_variant_sequence_coverage : int
Trim variant sequences to positions supported by at least this number of RNA reads.
Trim variant sequences to positions supported by at least this number
of RNA reads.
variant_sequence_assembly : int
If True, then assemble variant cDNA sequences based on overlap of RNA reads. If False,
then variant cDNA sequences must be fully spanned and contained within RNA reads.
If True, then assemble variant cDNA sequences based on overlap of RNA
reads. If False, then variant cDNA sequences must be fully spanned and
contained within RNA reads.
num_mutant_epitopes_to_keep : int, optional
Number of top-ranking epitopes for each vaccine peptide to include in computation.
Number of top-ranking epitopes for each vaccine peptide to include in
computation.
min_epitope_score : float, optional
Ignore peptides with binding predictions whose normalized score is less than this.
gene_pathway_check : GenePathwayCheck object, optional
If provided, will check against known pathways/gene sets/variant sets and include the
info in the all-variants output file.
Ignore peptides with binding predictions whose normalized score is less
than this.
gene_pathway_check : GenePathwayCheck, optional
If provided, will check against known pathways/gene sets/variant sets and
include the info in the all-variants output file.
"""
self.variants = variants
self.reads_generator = reads_generator
Expand Down Expand Up @@ -272,7 +273,7 @@ def ranked_vaccine_peptides(
"""
This function returns a sorted list whose first element is a Variant and whose second
element is a list of VaccinePeptide objects.
"""
"""
variant_peptides_dict = self.vaccine_peptides
result_list = list(variant_peptides_dict.items())
# TODO: move this sort key into its own function, also make less nuts
Expand Down
45 changes: 27 additions & 18 deletions vaxrank/epitope_prediction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2016. Mount Sinai School of Medicine
# Copyright (c) 2016-2018. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -72,6 +72,7 @@ def logistic_epitope_score(

return logistic / normalizer


def fm_index_path(genome):
"""
Returns a path for cached reference peptides, for the given genome.
Expand All @@ -84,6 +85,7 @@ def fm_index_path(genome):
return os.path.join(cache_dir, '%s_%d_%d.fm' % (
genome.species.latin_name, genome.release, 2 if six.PY2 else 3))


def index_contains_kmer(fm, kmer):
"""
Checks FM index for kmer, returns true if found.
Expand All @@ -94,6 +96,7 @@ def index_contains_kmer(fm, kmer):
break
return found


def generate_protein_sequences(genome):
"""
Generator whose elements are protein sequences from the given genome.
Expand All @@ -112,6 +115,7 @@ def generate_protein_sequences(genome):
protein_sequence = protein_sequence.encode("ascii")
yield protein_sequence


def load_reference_peptides_index(genome, force_reload=False):
"""
Loads the FM index containing reference peptides.
Expand All @@ -138,6 +142,7 @@ def load_reference_peptides_index(genome, force_reload=False):
return fm
return shellinford.FMIndex(filename=path)


def predict_epitopes(
mhc_predictor,
protein_fragment,
Expand Down Expand Up @@ -175,7 +180,7 @@ def predict_epitopes(
try:
mhctools_binding_predictions = mhc_predictor.predict_subsequences(
{protein_fragment.gene_name: protein_fragment.amino_acids})
except Exception as exc:
except:
logger.error(
'MHC prediction errored for protein fragment %s, with traceback: %s',
protein_fragment, traceback.format_exc())
Expand Down Expand Up @@ -212,11 +217,11 @@ def predict_epitopes(
]
if len(valid_wt_peptides) > 0:
wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides)
except ValueError as err:
except:
logger.error(
'MHC prediction for WT peptides errored, with traceback: %s',
traceback.format_exc())

wt_predictions_grouped = {}
# break it out: (peptide, allele) -> prediction
for wt_prediction in wt_predictions:
Expand Down Expand Up @@ -264,28 +269,32 @@ def predict_epitopes(
wt_ic50 = binding_prediction.value

epitope_prediction = EpitopePrediction(
allele=binding_prediction.allele,
peptide_sequence=peptide,
wt_peptide_sequence=wt_peptide,
length=len(peptide),
ic50=binding_prediction.value,
wt_ic50=wt_ic50,
percentile_rank=binding_prediction.percentile_rank,
prediction_method_name=binding_prediction.prediction_method_name,
overlaps_mutation=overlaps_mutation,
source_sequence=protein_fragment.amino_acids,
offset=peptide_start_offset,
occurs_in_reference=occurs_in_reference)
allele=binding_prediction.allele,
peptide_sequence=peptide,
wt_peptide_sequence=wt_peptide,
length=len(peptide),
ic50=binding_prediction.value,
wt_ic50=wt_ic50,
percentile_rank=binding_prediction.percentile_rank,
prediction_method_name=binding_prediction.prediction_method_name,
overlaps_mutation=overlaps_mutation,
source_sequence=protein_fragment.amino_acids,
offset=peptide_start_offset,
occurs_in_reference=occurs_in_reference)
if epitope_prediction.logistic_epitope_score() >= min_epitope_score:
key = (epitope_prediction.peptide_sequence, epitope_prediction.allele)
results[key] = epitope_prediction
else:
num_low_scoring += 1

logger.info('%d total peptides: %d occur in reference, %d failed score threshold',
num_total, num_occurs_in_reference, num_low_scoring)
logger.info(
"%d total peptides: %d occur in reference, %d failed score threshold",
num_total,
num_occurs_in_reference,
num_low_scoring)
return results


def slice_epitope_predictions(
epitope_predictions,
start_offset,
Expand Down
Loading

0 comments on commit 9334465

Please sign in to comment.