PEP8 and style changes, made GenePathwayCheck easier to read

openvax · Oct 10, 2018 · 9334465 · 9334465
1 parent 47decce
commit 9334465
Show file tree

Hide file tree

Showing 7 changed files with 154 additions and 123 deletions.
diff --git a/vaxrank/cli.py b/vaxrank/cli.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016. Mount Sinai School of Medicine
+# Copyright (c) 2016-2018. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -216,7 +216,9 @@ def add_vaccine_peptide_args(arg_parser):
         "--min-epitope-score",
         default=1e-10,
         type=float,
-        help="Ignore epitopes whose normalized score falls below this threshold")
+        help=(
+            "Ignore predicted MHC ligands whose normalized binding score "
+            "falls below this threshold"))
 
 
 def add_supplemental_report_args(arg_parser):

diff --git a/vaxrank/core_logic.py b/vaxrank/core_logic.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016. Mount Sinai School of Medicine
+# Copyright (c) 2016-2018. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,11 +13,10 @@
 # limitations under the License.
 
 from __future__ import absolute_import, print_function, division
-from collections import defaultdict, namedtuple, OrderedDict
+from collections import OrderedDict
 import logging
 
-from numpy import isclose, asarray
-import pandas as pd
+from numpy import isclose
 
 from isovar.protein_sequences import (
     reads_generator_to_protein_sequences_generator,
@@ -26,12 +25,11 @@
 from .mutant_protein_fragment import MutantProteinFragment
 from .epitope_prediction import predict_epitopes, slice_epitope_predictions
 from .vaccine_peptide import VaccinePeptide
-from .gene_pathway_check import GenePathwayCheck
-
 
 logger = logging.getLogger(__name__)
 
-class VaxrankCoreLogic:
+
+class VaxrankCoreLogic(object):
     def __init__(
             self,
             variants,
@@ -47,50 +45,53 @@ def __init__(
             min_epitope_score=0.0,
             gene_pathway_check=None):
         """
-        Construct a VaxrankCoreLogic object.
-
         Parameters
         ----------
         variants : VariantCollection
             Variant objects to evaluate for vaccine inclusion
 
         reads_generator : generator
-            Yields sequence of varcode.Variant objects paired with sequences of AlleleRead objects
-            that support that variant.
+            Yields sequence of varcode.Variant objects paired with sequences of
+            AlleleRead objects that support that variant.
 
         mhc_predictor : mhctools.BasePredictor
-            Object with predict_peptides method, used for making pMHC binding predictions
+            Object with predict_peptides method, used for making pMHC binding
+            predictions
 
         vaccine_peptide_length : int
             Length of vaccine SLP to construct
 
         padding_around_mutation : int
-            Number of off-center windows around the mutation to consider as vaccine peptides.
+            Number of off-center windows around the mutation to consider as vaccine
+            peptides.
 
         max_vaccine_peptides_per_variant : int
             Number of vaccine peptides to generate for each mutation.
 
         min_alt_rna_reads : int
-            Drop variant sequences at loci with fewer than this number of reads supporting the alt
-            allele.
+            Drop variant sequences at loci with fewer than this number of reads
+            supporting the alt allele.
 
         min_variant_sequence_coverage : int
-            Trim variant sequences to positions supported by at least this number of RNA reads.
+            Trim variant sequences to positions supported by at least this number
+            of RNA reads.
 
         variant_sequence_assembly : int
-            If True, then assemble variant cDNA sequences based on overlap of RNA reads. If False,
-            then variant cDNA sequences must be fully spanned and contained within RNA reads.
+            If True, then assemble variant cDNA sequences based on overlap of RNA
+            reads. If False, then variant cDNA sequences must be fully spanned and
+            contained within RNA reads.
 
         num_mutant_epitopes_to_keep : int, optional
-            Number of top-ranking epitopes for each vaccine peptide to include in computation.
+            Number of top-ranking epitopes for each vaccine peptide to include in
+            computation.
 
         min_epitope_score : float, optional
-            Ignore peptides with binding predictions whose normalized score is less than this.
-
-        gene_pathway_check : GenePathwayCheck object, optional
-            If provided, will check against known pathways/gene sets/variant sets and include the
-            info in the all-variants output file.
+            Ignore peptides with binding predictions whose normalized score is less
+            than this.
 
+        gene_pathway_check : GenePathwayCheck, optional
+            If provided, will check against known pathways/gene sets/variant sets and
+            include the info in the all-variants output file.
         """
         self.variants = variants
         self.reads_generator = reads_generator
@@ -272,7 +273,7 @@ def ranked_vaccine_peptides(
         """
         This function returns a sorted list whose first element is a Variant and whose second
         element is a list of VaccinePeptide objects.
-        """            
+        """
         variant_peptides_dict = self.vaccine_peptides
         result_list = list(variant_peptides_dict.items())
         # TODO: move this sort key into its own function, also make less nuts

diff --git a/vaxrank/epitope_prediction.py b/vaxrank/epitope_prediction.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016. Mount Sinai School of Medicine
+# Copyright (c) 2016-2018. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -72,6 +72,7 @@ def logistic_epitope_score(
 
         return logistic / normalizer
 
+
 def fm_index_path(genome):
     """
     Returns a path for cached reference peptides, for the given genome.
@@ -84,6 +85,7 @@ def fm_index_path(genome):
     return os.path.join(cache_dir, '%s_%d_%d.fm' % (
         genome.species.latin_name, genome.release, 2 if six.PY2 else 3))
 
+
 def index_contains_kmer(fm, kmer):
     """
     Checks FM index for kmer, returns true if found.
@@ -94,6 +96,7 @@ def index_contains_kmer(fm, kmer):
         break
     return found
 
+
 def generate_protein_sequences(genome):
     """
     Generator whose elements are protein sequences from the given genome.
@@ -112,6 +115,7 @@ def generate_protein_sequences(genome):
                 protein_sequence = protein_sequence.encode("ascii")
             yield protein_sequence
 
+
 def load_reference_peptides_index(genome, force_reload=False):
     """
     Loads the FM index containing reference peptides.
@@ -138,6 +142,7 @@ def load_reference_peptides_index(genome, force_reload=False):
         return fm
     return shellinford.FMIndex(filename=path)
 
+
 def predict_epitopes(
         mhc_predictor,
         protein_fragment,
@@ -175,7 +180,7 @@ def predict_epitopes(
     try:
         mhctools_binding_predictions = mhc_predictor.predict_subsequences(
             {protein_fragment.gene_name: protein_fragment.amino_acids})
-    except Exception as exc:
+    except:
         logger.error(
             'MHC prediction errored for protein fragment %s, with traceback: %s',
             protein_fragment, traceback.format_exc())
@@ -212,11 +217,11 @@ def predict_epitopes(
         ]
         if len(valid_wt_peptides) > 0:
             wt_predictions = mhc_predictor.predict_peptides(valid_wt_peptides)
-    except ValueError as err:
+    except:
         logger.error(
             'MHC prediction for WT peptides errored, with traceback: %s',
             traceback.format_exc())
-        
+
     wt_predictions_grouped = {}
     # break it out: (peptide, allele) -> prediction
     for wt_prediction in wt_predictions:
@@ -264,28 +269,32 @@ def predict_epitopes(
             wt_ic50 = binding_prediction.value
 
         epitope_prediction = EpitopePrediction(
-                allele=binding_prediction.allele,
-                peptide_sequence=peptide,
-                wt_peptide_sequence=wt_peptide,
-                length=len(peptide),
-                ic50=binding_prediction.value,
-                wt_ic50=wt_ic50,
-                percentile_rank=binding_prediction.percentile_rank,
-                prediction_method_name=binding_prediction.prediction_method_name,
-                overlaps_mutation=overlaps_mutation,
-                source_sequence=protein_fragment.amino_acids,
-                offset=peptide_start_offset,
-                occurs_in_reference=occurs_in_reference)
+            allele=binding_prediction.allele,
+            peptide_sequence=peptide,
+            wt_peptide_sequence=wt_peptide,
+            length=len(peptide),
+            ic50=binding_prediction.value,
+            wt_ic50=wt_ic50,
+            percentile_rank=binding_prediction.percentile_rank,
+            prediction_method_name=binding_prediction.prediction_method_name,
+            overlaps_mutation=overlaps_mutation,
+            source_sequence=protein_fragment.amino_acids,
+            offset=peptide_start_offset,
+            occurs_in_reference=occurs_in_reference)
         if epitope_prediction.logistic_epitope_score() >= min_epitope_score:
             key = (epitope_prediction.peptide_sequence, epitope_prediction.allele)
             results[key] = epitope_prediction
         else:
             num_low_scoring += 1
 
-    logger.info('%d total peptides: %d occur in reference, %d failed score threshold',
-        num_total, num_occurs_in_reference, num_low_scoring)
+    logger.info(
+        "%d total peptides: %d occur in reference, %d failed score threshold",
+        num_total,
+        num_occurs_in_reference,
+        num_low_scoring)
     return results
 
+
 def slice_epitope_predictions(
         epitope_predictions,
         start_offset,