From bb498aef116833c719e76a38333003e4d6d8d924 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 14:41:54 -0500 Subject: [PATCH 01/19] removed namedtuple, other small changes in preparation for a much larger rewrite --- setup.py | 10 +- test/test_cancer_driver_gene.py | 13 +++ test/test_epitope_prediction.py | 21 +++- test/test_manufacturability.py | 13 +++ test/test_mutant_protein_sequence.py | 2 - test/test_shell_script.py | 13 +++ test/testing_helpers.py | 2 - vaxrank/__init__.py | 2 +- vaxrank/cli.py | 2 - vaxrank/core_logic.py | 2 - vaxrank/epitope_prediction.py | 149 ++++++++++++++++++--------- vaxrank/gene_pathway_check.py | 2 - vaxrank/manufacturability.py | 2 - vaxrank/mutant_protein_fragment.py | 116 ++++++++++++--------- vaxrank/reference_proteome.py | 2 - vaxrank/report.py | 16 +-- vaxrank/vaccine_peptide.py | 82 +++++++-------- 17 files changed, 269 insertions(+), 180 deletions(-) diff --git a/setup.py b/setup.py index dfd7fec..e3ddee7 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -45,12 +43,12 @@ name='vaxrank', version=version, description="Mutant peptide ranking for personalized cancer vaccines", - author="Alex Rubinsteyn", - author_email="alex.rubinsteyn@gmail.com", - url="https://github.com/hammerlab/vaxrank", + author="Alex Rubinsteyn, Julia Kodysh", + author_email="alex@openvax.org, julia@openvax.org", + url="https://github.com/openvax/vaxrank", license="http://www.apache.org/licenses/LICENSE-2.0.html", classifiers=[ - 'Development Status :: 3 - Alpha', + 'Development Status :: 4 - Beta', 'Environment :: Console', 'Operating System :: OS Independent', 'Intended Audience :: Science/Research', diff --git a/test/test_cancer_driver_gene.py b/test/test_cancer_driver_gene.py index 3e4552e..b9a3fe9 100644 --- a/test/test_cancer_driver_gene.py +++ b/test/test_cancer_driver_gene.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from vaxrank.gene_pathway_check import ( GenePathwayCheck, _IFNG_RESPONSE_COLUMN_NAME, diff --git a/test/test_epitope_prediction.py b/test/test_epitope_prediction.py index 789a338..7b0f285 100644 --- a/test/test_epitope_prediction.py +++ b/test/test_epitope_prediction.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +17,7 @@ from mhctools import RandomBindingPredictor from pyensembl import genome_for_reference_name from varcode import Variant -from vaxrank.epitope_prediction import predict_epitopes +from vaxrank.epitope_prediction import predict_epitopes, EpitopePrediction from vaxrank.mutant_protein_fragment import MutantProteinFragment from vaxrank.vaccine_peptide import VaccinePeptide @@ -89,3 +87,20 @@ def predict_subsequences(self, x): genome=mouse_genome) eq_(0, len(epitope_predictions)) + +def test_EpitopePrediction_json_serialization(): + e = EpitopePrediction( + allele="HLA-A*02:01", + peptide_sequence="SIINFEQL", + ic50=2.0, + wt_peptide_sequence="SIINFEKL", + wt_ic50=2000.0, + percentile_rank=0.3, + prediction_method_name="ImaginationMHCpan", + overlaps_mutation=True, + source_sequence="SSIINFEQL", + offset=1, + occurs_in_reference=False) + json = e.to_json() + e2 = EpitopePrediction.from_json((json)) + eq_(e, e2) \ No newline at end of file diff --git a/test/test_manufacturability.py b/test/test_manufacturability.py index 740ef4d..fc39535 100644 --- a/test/test_manufacturability.py +++ b/test/test_manufacturability.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from vaxrank.manufacturability import ManufacturabilityScores diff --git a/test/test_mutant_protein_sequence.py b/test/test_mutant_protein_sequence.py index 598acce..55cc822 100644 --- a/test/test_mutant_protein_sequence.py +++ b/test/test_mutant_protein_sequence.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/test/test_shell_script.py b/test/test_shell_script.py index acfde3f..02465e7 100644 --- a/test/test_shell_script.py +++ b/test/test_shell_script.py @@ -1,3 +1,16 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + from os.path import getsize from mock import patch from nose.plugins.attrib import attr diff --git a/test/testing_helpers.py b/test/testing_helpers.py index 22248a1..3b241ac 100644 --- a/test/testing_helpers.py +++ b/test/testing_helpers.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/__init__.py b/vaxrank/__init__.py index 7863915..6849410 100644 --- a/vaxrank/__init__.py +++ b/vaxrank/__init__.py @@ -1 +1 @@ -__version__ = "1.0.2" +__version__ = "1.1.0" diff --git a/vaxrank/cli.py b/vaxrank/cli.py index d0426e9..ce78da9 100644 --- a/vaxrank/cli.py +++ b/vaxrank/cli.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/core_logic.py b/vaxrank/core_logic.py index 6db6a26..9cfc904 100644 --- a/vaxrank/core_logic.py +++ b/vaxrank/core_logic.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/epitope_prediction.py b/vaxrank/epitope_prediction.py index eebd6b2..aabba0c 100644 --- a/vaxrank/epitope_prediction.py +++ b/vaxrank/epitope_prediction.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,33 +11,57 @@ # limitations under the License. from __future__ import absolute_import, print_function, division -from collections import namedtuple, OrderedDict +from collections import OrderedDict import traceback import logging import numpy as np +from serializable import Serializable from .reference_proteome import ReferenceProteome -logger = logging.getLogger(__name__) -EpitopePredictionBase = namedtuple("EpitopePrediction", [ - "allele", - "peptide_sequence", - "wt_peptide_sequence", - "length", - "ic50", - "wt_ic50", - "percentile_rank", - "prediction_method_name", - "overlaps_mutation", - "source_sequence", - "offset", - "occurs_in_reference", -]) +logger = logging.getLogger(__name__) -class EpitopePrediction(EpitopePredictionBase): +class EpitopePrediction(Serializable): + def __init__( + self, + allele, + peptide_sequence, + wt_peptide_sequence, + ic50, + wt_ic50, + percentile_rank, + prediction_method_name, + overlaps_mutation, + source_sequence, + offset, + occurs_in_reference): + self.allele = allele + self.peptide_sequence = peptide_sequence + self.wt_peptide_sequence = wt_peptide_sequence + self.length = len(peptide_sequence) + self.ic50 = ic50 + self.wt_ic50 = wt_ic50 + self.percentile_rank = percentile_rank + self.prediction_method_name = prediction_method_name + self.overlaps_mutation = overlaps_mutation + self.source_sequence = source_sequence + self.offset = offset + self.overlaps_mutation = overlaps_mutation + self.occurs_in_reference = occurs_in_reference + + @classmethod + def from_dict(cls, d): + """ + Deserialize EpitopePrediction from a dictionary of keywords. + """ + d = d.copy() + if "length" in d: + # lenth argument removed in version 1.1.0 + del d["length"] + return cls(**d) def logistic_epitope_score( self, @@ -70,6 +92,56 @@ def logistic_epitope_score( return logistic / normalizer + def slice_source_sequence(self, start_offset, end_offset): + """ + + Parameters + ---------- + start_offset : int + + end_offset : int + + Return EpitopePrediction object with source sequence and offset + adjusted. If this slicing would shorten the mutant peptide, then + return None. + """ + if self.offset < start_offset: + # this peptide starts before the requested slice through the + # source sequence + return None + + if self.offset + self.length > end_offset: + # this peptide goes beyond the end of the requested slice + # through the source sequence + return None + + return EpitopePrediction( + allele=self.allele, + peptide_sequence=self.peptide_sequence, + wt_peptide_sequence=self.wt_peptide_sequence, + ic50=self.ic50, + wt_ic50=self.wt_ic50, + percentile_rank=self.percentile_rank, + prediction_method_name=self.prediction_method_name, + overlaps_mutation=self.overlaps_mutation, + source_sequence=self.source_sequence[start_offset:end_offset], + offset=self.offset - start_offset, + occurs_in_reference=self.occurs_in_reference) + + +def slice_epitope_predictions( + epitope_predictions, + start_offset, + end_offset): + """ + Return subset of EpitopePrediction objects which overlap the given interval + and slice through their source sequences and adjust their offset. + """ + return [ + p.slice_source_sequence(start_offset, end_offset) + for p in epitope_predictions + if p.offset >= start_offset and p.offset + p.length <= end_offset + ] def predict_epitopes( mhc_predictor, @@ -150,10 +222,11 @@ def predict_epitopes( 'MHC prediction for WT peptides errored, with traceback: %s', traceback.format_exc()) - wt_predictions_grouped = {} # break it out: (peptide, allele) -> prediction - for wt_prediction in wt_predictions: - wt_predictions_grouped[(wt_prediction.peptide, wt_prediction.allele)] = wt_prediction + wt_predictions_grouped = { + (wt_prediction.peptide, wt_prediction.allele): wt_prediction + for wt_prediction in wt_predictions + } # convert from mhctools.BindingPrediction objects to EpitopePrediction # which differs primarily by also having a boolean field @@ -181,7 +254,8 @@ def predict_epitopes( # compute WT epitope sequence, if this epitope overlaps the mutation if overlaps_mutation: wt_peptide = wt_peptides[peptide] - wt_prediction = wt_predictions_grouped.get((wt_peptide, binding_prediction.allele)) + wt_prediction = wt_predictions_grouped.get( + (wt_peptide, binding_prediction.allele)) wt_ic50 = None if wt_prediction is None: # this can happen in a stop-loss variant: do we want to check that here? @@ -200,7 +274,6 @@ def predict_epitopes( allele=binding_prediction.allele, peptide_sequence=peptide, wt_peptide_sequence=wt_peptide, - length=len(peptide), ic50=binding_prediction.value, wt_ic50=wt_ic50, percentile_rank=binding_prediction.percentile_rank, @@ -209,6 +282,7 @@ def predict_epitopes( source_sequence=protein_fragment.amino_acids, offset=peptide_start_offset, occurs_in_reference=occurs_in_reference) + if epitope_prediction.logistic_epitope_score() >= min_epitope_score: key = (epitope_prediction.peptide_sequence, epitope_prediction.allele) results[key] = epitope_prediction @@ -221,30 +295,3 @@ def predict_epitopes( num_occurs_in_reference, num_low_scoring) return results - - -def slice_epitope_predictions( - epitope_predictions, - start_offset, - end_offset): - """ - Return subset of EpitopePrediction objects which overlap the given interval - and slice through their source sequences and adjust their offset. - """ - return [ - EpitopePrediction( - allele=p.allele, - peptide_sequence=p.peptide_sequence, - wt_peptide_sequence=p.wt_peptide_sequence, - length=p.length, - ic50=p.ic50, - wt_ic50=p.wt_ic50, - percentile_rank=p.percentile_rank, - prediction_method_name=p.prediction_method_name, - overlaps_mutation=p.overlaps_mutation, - source_sequence=p.source_sequence[start_offset:end_offset], - offset=p.offset - start_offset, - occurs_in_reference=p.occurs_in_reference) - for p in epitope_predictions - if p.offset >= start_offset and p.offset + p.length <= end_offset - ] diff --git a/vaxrank/gene_pathway_check.py b/vaxrank/gene_pathway_check.py index 698ef39..91131f7 100644 --- a/vaxrank/gene_pathway_check.py +++ b/vaxrank/gene_pathway_check.py @@ -1,5 +1,3 @@ -# Copyright (c) 2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/manufacturability.py b/vaxrank/manufacturability.py index 5034c03..36d9d70 100644 --- a/vaxrank/manufacturability.py +++ b/vaxrank/manufacturability.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/mutant_protein_fragment.py b/vaxrank/mutant_protein_fragment.py index e60c9b0..170dce7 100644 --- a/vaxrank/mutant_protein_fragment.py +++ b/vaxrank/mutant_protein_fragment.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,57 +12,78 @@ from __future__ import absolute_import, print_function, division -from collections import namedtuple import logging from varcode.effects import top_priority_effect - +from serializable import Serializable logger = logging.getLogger(__name__) -# using a namedtuple base class for the immutable fields of a MutantProteinFragment -# since it makes it clearer what the essential information is and provides -# useful comparison/hashing methods - -MutantProteinFragmentBase = namedtuple("MutantProteinFragment", ( - # varcode.Variant - "variant", - # gene and transcript(s) which were used to translate one or more - # variant cDNA sequences into the following amino acids - "gene_name", - - ### - # Translated protein sequence, aggregated from possibly multiple - # synonymous coding sequences - ### - - "amino_acids", - # offsets of amino acids which differ due to the mutation - "mutant_amino_acid_start_offset", - "mutant_amino_acid_end_offset", - - # PyEnsembl Transcript objects for reference transcripts which - # were used to establish the reading frame of coding sequence(s) - # detected from RNA - "supporting_reference_transcripts", - - ### - # RNA evidence - ### - - # number of reads overlapping the variant locus - "n_overlapping_reads", - # number of reads supporting the variant - "n_alt_reads", - # number of reads supporting the reference allele - "n_ref_reads", - - # number of RNA reads fully spanning the cDNA sequence(s) from which we - # translated this amino acid sequence. - "n_alt_reads_supporting_protein_sequence", -)) - -class MutantProteinFragment(MutantProteinFragmentBase): + +class MutantProteinFragment(Serializable): + def __init__( + self, + variant, + gene_name, + amino_acids, + mutant_amino_acid_start_offset, + mutant_amino_acid_end_offset, + supporting_reference_transcripts, + n_overlapping_reads, + n_alt_reads, + n_ref_reads, + n_alt_reads_supporting_protein_sequence): + """ + Parameters + ---------- + variant : varcode.Variant + Somatic mutation. + + gene_name : str + Gene from which we used a transcript to translate this mutation. + + amino_acids : str + Translated protein sequence, aggregated from possibly multiple + synonymous coding sequences. + + mutant_amino_acid_start_offset : int + Starting offset of amino acids which differ due to the mutation + + mutant_amino_acid_end_offset : int + End offset of amino acids which differ due to the mutation + + + supporting_reference_transcripts : list of pyensembl.Transcript + PyEnsembl Transcript objects for reference transcripts which + were used to establish the reading frame of coding sequence(s) + detected from RNA. + + n_overlapping_reads : int + Number of reads overlapping the variant locus. + + n_alt_reads : int + Number of reads supporting the variant. + + n_ref_reads : int + Number of reads supporting the reference allele. + + n_alt_reads_supporting_protein_sequence : int + Number of RNA reads fully spanning the cDNA sequence(s) from which + we translated this amino acid sequence. + """ + self.variant = variant + self.gene_name = gene_name + self.amino_acids = amino_acids + self.mutant_amino_acid_start_offset = mutant_amino_acid_start_offset + self.mutant_amino_acid_end_offset = mutant_amino_acid_end_offset + self.supporting_reference_transcripts = \ + supporting_reference_transcripts + self.n_overlapping_reads = n_overlapping_reads + self.n_alt_reads = n_alt_reads + self.n_ref_reads = n_ref_reads + self.n_alt_reads_supporting_protein_sequence = \ + n_alt_reads_supporting_protein_sequence + @classmethod def from_isovar_protein_sequence(cls, variant, protein_sequence): return cls( @@ -185,7 +204,8 @@ def global_start_pos(self): # position of mutation start relative to the full amino acid sequence global_mutation_start_pos = self.predicted_effect().aa_mutation_start_offset if global_mutation_start_pos is None: - logger.error('Could not find mutation start pos for variant %s', + logger.error( + 'Could not find mutation start pos for variant %s', self.variant) return -1 diff --git a/vaxrank/reference_proteome.py b/vaxrank/reference_proteome.py index ed2f79f..4f15d45 100644 --- a/vaxrank/reference_proteome.py +++ b/vaxrank/reference_proteome.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/vaxrank/report.py b/vaxrank/report.py index 63bd1a2..3cd3d68 100644 --- a/vaxrank/report.py +++ b/vaxrank/report.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016-2018. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,7 +11,7 @@ # limitations under the License. from __future__ import absolute_import, division -from collections import namedtuple, OrderedDict +from collections import OrderedDict from importlib import import_module import logging import os @@ -42,18 +40,6 @@ ) -PatientInfo = namedtuple("PatientInfo", ( - "patient_id", - "vcf_paths", - "bam_path", - "mhc_alleles", - "num_somatic_variants", - "num_coding_effect_variants", - "num_variants_with_rna_support", - "num_variants_with_vaccine_peptides", -)) - - class TemplateDataCreator(object): def __init__( self, diff --git a/vaxrank/vaccine_peptide.py b/vaxrank/vaccine_peptide.py index a8785c0..cd4bd91 100644 --- a/vaxrank/vaccine_peptide.py +++ b/vaxrank/vaccine_peptide.py @@ -1,5 +1,3 @@ -# Copyright (c) 2016. Mount Sinai School of Medicine -# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,60 +13,52 @@ from __future__ import absolute_import, print_function, division -from collections import namedtuple from operator import attrgetter import numpy as np +from serializable import Serializable -from .manufacturability import ManufacturabilityScores - -VaccinePeptideBase = namedtuple( - "VaccinePeptide", [ - "mutant_protein_fragment", - "mutant_epitope_predictions", - "wildtype_epitope_predictions", - "mutant_epitope_score", - "wildtype_epitope_score", - "num_mutant_epitopes_to_keep", - "manufacturability_scores"]) - -class VaccinePeptide(VaccinePeptideBase): +class VaccinePeptide(Serializable): """ VaccinePeptide combines the sequence information of MutantProteinFragment with MHC binding predictions for subsequences of the protein fragment. - The resulting lists of mutant and wildtype epitope predictions are sorted by ic50. + The resulting lists of mutant and wildtype epitope predictions + are sorted by affinity. """ - def __new__( - cls, + + def __init__( + self, mutant_protein_fragment, epitope_predictions, - num_mutant_epitopes_to_keep=10000): + num_mutant_epitopes_to_keep=10000, + sort_predictions_by='ic50'): + self.mutant_protein_fragment = mutant_protein_fragment + self.epitope_predictions = epitope_predictions + self.num_mutant_epitopes_to_keep = num_mutant_epitopes_to_keep + self.sort_predictions_by = sort_predictions_by + + sort_key = attrgetter(sort_predictions_by) + # only keep the top k epitopes - mutant_epitope_predictions = sorted([ - p for p in epitope_predictions if p.overlaps_mutation and not p.occurs_in_reference - ], key=attrgetter('ic50'))[:num_mutant_epitopes_to_keep] - wildtype_epitope_predictions = sorted([ - p for p in epitope_predictions if not p.overlaps_mutation or p.occurs_in_reference - ], key=attrgetter('ic50')) - - wildtype_epitope_score = sum( - p.logistic_epitope_score() for p in wildtype_epitope_predictions) + self.mutant_epitope_predictions = sorted([ + p for p in epitope_predictions + if p.overlaps_mutation and not p.occurs_in_reference + ], key=sort_key)[:self.num_mutant_epitopes_to_keep] + + self.wildtype_epitope_predictions = sorted([ + p for p in epitope_predictions + if not p.overlaps_mutation or p.occurs_in_reference + ], key=sort_key) + + self.wildtype_epitope_score = sum( + p.logistic_epitope_score() + for p in self.wildtype_epitope_predictions) # only keep the top k epitopes for the purposes of the score - mutant_epitope_score = sum( - p.logistic_epitope_score() for p in mutant_epitope_predictions) - - return VaccinePeptideBase.__new__( - cls, - mutant_protein_fragment=mutant_protein_fragment, - mutant_epitope_predictions=mutant_epitope_predictions, - wildtype_epitope_predictions=wildtype_epitope_predictions, - mutant_epitope_score=mutant_epitope_score, - wildtype_epitope_score=wildtype_epitope_score, - num_mutant_epitopes_to_keep=num_mutant_epitopes_to_keep, - manufacturability_scores=ManufacturabilityScores.from_amino_acids( - mutant_protein_fragment.amino_acids)) + self.mutant_epitope_score = sum( + p.logistic_epitope_score() + for p in self.mutant_epitope_predictions) def peptide_synthesis_difficulty_score_tuple( self, @@ -207,4 +197,12 @@ def to_dict(self): "mutant_protein_fragment": self.mutant_protein_fragment, "epitope_predictions": epitope_predictions, "num_mutant_epitopes_to_keep": self.num_mutant_epitopes_to_keep, + "sort_predictions_by": self.sort_predictions_by, } + + @classmethod + def from_dict(cls, d): + d = d.copy() + if "sort_predictions_by" not in d: + d["sort_predictions_by"] = "ic50" + return cls(**d) From d7173ebe193731cdf71d5386f3e4a4f879cefcc6 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 14:56:14 -0500 Subject: [PATCH 02/19] correct import path for PatientInfo --- vaxrank/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vaxrank/cli.py b/vaxrank/cli.py index ce78da9..51cc09d 100644 --- a/vaxrank/cli.py +++ b/vaxrank/cli.py @@ -40,8 +40,8 @@ make_csv_report, make_minimal_neoepitope_report, TemplateDataCreator, - PatientInfo, ) +from .patient_info import PatientInfo logger = logging.getLogger(__name__) From da863b8555308b0de064fb1233f59dfa4d7c2345 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 15:14:19 -0500 Subject: [PATCH 03/19] added missing patient_info module --- vaxrank/patient_info.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 vaxrank/patient_info.py diff --git a/vaxrank/patient_info.py b/vaxrank/patient_info.py new file mode 100644 index 0000000..6418247 --- /dev/null +++ b/vaxrank/patient_info.py @@ -0,0 +1,21 @@ +from serializable import Serializable + +class PatientInfo(Serializable): + def __init__( + self, + patient_id, + vcf_paths, + bam_path, + mhc_alleles, + num_somatic_variants, + num_coding_effect_variants, + num_variants_with_rna_support, + num_variants_with_vaccine_peptides): + self.patient_id = patient_id + self.vcf_paths = vcf_paths + self.bam_path = bam_path + self.mhc_alleles = mhc_alleles + self.num_somatic_variants = num_somatic_variants + self.num_coding_effect_variants = num_coding_effect_variants + self.num_variants_with_rna_support = num_variants_with_rna_support + self.num_variants_with_vaccine_peptides = num_variants_with_vaccine_peptides From 8dc13da88b7b67c4219b41c250d42bf38632b2e8 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 15:32:54 -0500 Subject: [PATCH 04/19] run tests in py3.5 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index b000eb2..f50fec6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,7 @@ sudo: true language: python python: - "2.7" - - "3.6" + - "3.5" git: # don't need the default depth of 50 # but don't want to use a depth of 1 since that affects From 261324f3cfbf296d11d945f51194d1f4be15f5d7 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 16:00:02 -0500 Subject: [PATCH 05/19] added manufacturability_scores back to vaccine_peptide --- vaxrank/vaccine_peptide.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vaxrank/vaccine_peptide.py b/vaxrank/vaccine_peptide.py index cd4bd91..65c5c8e 100644 --- a/vaxrank/vaccine_peptide.py +++ b/vaxrank/vaccine_peptide.py @@ -17,6 +17,7 @@ import numpy as np from serializable import Serializable +from manufacturability import ManufacturabilityScores class VaccinePeptide(Serializable): @@ -60,6 +61,10 @@ def __init__( p.logistic_epitope_score() for p in self.mutant_epitope_predictions) + self.manufacturability_scores = \ + ManufacturabilityScores.from_amino_acids( + self.mutant_protein_fragment.amino_acids) + def peptide_synthesis_difficulty_score_tuple( self, max_c_terminal_hydropathy=1.5, From b2135239afdcb6ff41e41a3b35001cd8b8be5e5a Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 19:23:38 -0500 Subject: [PATCH 06/19] fixed manufacturability import --- vaxrank/vaccine_peptide.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vaxrank/vaccine_peptide.py b/vaxrank/vaccine_peptide.py index 65c5c8e..4aeefe6 100644 --- a/vaxrank/vaccine_peptide.py +++ b/vaxrank/vaccine_peptide.py @@ -17,7 +17,8 @@ import numpy as np from serializable import Serializable -from manufacturability import ManufacturabilityScores + +from .manufacturability import ManufacturabilityScores class VaccinePeptide(Serializable): From a8e16b1968b05e629d1b85276afcc14ae5f3a71e Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 20:13:29 -0500 Subject: [PATCH 07/19] changed requirements to specify wrapt version that will work with python 3.5 on travis --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 6636654..0792c04 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,3 +16,4 @@ xvfbwrapper future>=0.16.0 # needed by pylint astropy datacache +wrapt>=1.11.2 From d713b621efc69f3a37a2e9ac7f900cb1b1f3ebb0 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 20:18:03 -0500 Subject: [PATCH 08/19] try fixing via setup tools --- .travis.yml | 6 ++++++ requirements.txt | 1 - 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index f50fec6..a46ec59 100644 --- a/.travis.yml +++ b/.travis.yml @@ -22,6 +22,12 @@ cache: - $HOME/.cache/pyensembl/GRCm38/ensembl93/ timeout: 300 before_install: + - | + # possibly required to fix Travis for py3.5 but we should get rid + # of this when using py3.6+ + if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then + pip install setuptools --upgrade + fi - | if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; diff --git a/requirements.txt b/requirements.txt index 0792c04..6636654 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,3 @@ xvfbwrapper future>=0.16.0 # needed by pylint astropy datacache -wrapt>=1.11.2 From cc77c6f69280ce20df522ba7d8291b34d6813c5e Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 20:27:46 -0500 Subject: [PATCH 09/19] try installing newer wrapt for py3.5 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index a46ec59..a564013 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,6 +27,7 @@ before_install: # of this when using py3.6+ if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then pip install setuptools --upgrade + pip install wrapt --upgrade fi - | if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then From 4aac8adbe27ea06e9a04609ea04db41909077537 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 21:01:33 -0500 Subject: [PATCH 10/19] got rid of py2.7 on Travis -- who needs it anymore? --- .travis.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index a564013..4deb8c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,6 @@ sudo: true language: python python: - - "2.7" - "3.5" git: # don't need the default depth of 50 From a0b93e876c68a96a44fe57088a5c8ef3d6e40585 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 21:02:19 -0500 Subject: [PATCH 11/19] push to PyPI for 3.x instead of 2.7 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 4deb8c7..5fd1380 100644 --- a/.travis.yml +++ b/.travis.yml @@ -100,4 +100,4 @@ deploy: secure: "kt07TyCxiumbu2SGZvl7J5LLzsLq0zxcCceOSGb4RE0tFOmiUmDv5LfGKME5lEFhPxleChaq/AILDOudEZO3H9xJY90/dXPgyXJOd6pCOzKq3zIKEjz3MEx3wBizcjR7ucHxYxDQzIBEBeoC4wTIc53mBR5gMgqyG8Dbciueg9Yo5qZED9rq/uY1ELxW2iQsS9LBK7n/blTPTisdmdG3g64eVEDsZD/0egPdupWhlkNuUQlA4LVr2KUifGrtXPL0A/m3CUEABJWre7PJ6motFS1KramP7/PQJwpXmZEHI1ayknNeN2ndMp4Ni64cEhtF/JIRYWBuGxzZXSuIv1g8XxKijm/qxSc4Gv0yIiP/Q3pZ5hfSC1RZhcpkc3ugCSOy5VwlF5O0P/EflMnsUE9ZRFON3s5+yjsJtNldsCRpdEsquFPn0BJC4kOdWKfIkYuhln6xcJawl+8uKlCJuOa1vZcsJ16E6RPN3TpkfChaMVBrD6zEbYZfrI5QCMV/uwkwutgV4H95FR0TlnDrOTwAMamxFBN7R8+YEHff9n21c4i7vzjBVlI16W/t2qqmaG/0w1hBhAiYvvV5BLh84km5gSewcjcKjBHVhYscF1Xxyq0CPmKdcOB6ePFbIWh/+NHQzDpzY8yKp9kzkZizZJGUAxsI2ZH3++BzQwcSUn664VY=" on: branch: master - condition: $TRAVIS_PYTHON_VERSION = "2.7" + condition: $TRAVIS_PYTHON_VERSION = "3.5" From 1e97bea81dfaabfeeff5e236a563cfa7787215a3 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 21:24:18 -0500 Subject: [PATCH 12/19] moved upgrade of setuptools into conda environment --- .travis.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5fd1380..e48595e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,13 +21,6 @@ cache: - $HOME/.cache/pyensembl/GRCm38/ensembl93/ timeout: 300 before_install: - - | - # possibly required to fix Travis for py3.5 but we should get rid - # of this when using py3.6+ - if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then - pip install setuptools --upgrade - pip install wrapt --upgrade - fi - | if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; @@ -68,6 +61,13 @@ install: # install pysam from conda because I'm having trouble installing Cython # for Python 3 on Travis - conda install -c bioconda pysam=0.9.0 + - | + # required to fix Travis for py3.5 but we should get rid + # of this when using py3.6+ + if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then + pip install setuptools --upgrade + pip install wrapt --upgrade + fi - pip install -r requirements.txt - pip install . - pip install coveralls From 691fd1033c7691a96283c08cc16710c955b2f379 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 21:51:08 -0500 Subject: [PATCH 13/19] what if we put wrapt in the conda env --- .travis.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index e48595e..bdbc38b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,7 +56,7 @@ addons: install: - > conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION - numpy nose pylint cython + numpy nose pylint cython wrapt==1.11.2 - source activate test-environment # install pysam from conda because I'm having trouble installing Cython # for Python 3 on Travis @@ -66,7 +66,6 @@ install: # of this when using py3.6+ if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then pip install setuptools --upgrade - pip install wrapt --upgrade fi - pip install -r requirements.txt - pip install . From 78a59703bc09adbb4a0cff723bd71d90df74b7fa Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Wed, 12 Feb 2020 23:07:05 -0500 Subject: [PATCH 14/19] only setuptools in conda env? --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index bdbc38b..9d91430 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,7 +56,7 @@ addons: install: - > conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION - numpy nose pylint cython wrapt==1.11.2 + numpy nose pylint cython - source activate test-environment # install pysam from conda because I'm having trouble installing Cython # for Python 3 on Travis From 381ac62f262af3afd41a2f916201796545199881 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Thu, 13 Feb 2020 09:30:09 -0500 Subject: [PATCH 15/19] tried to move dependency to conda env --- .travis.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.travis.yml b/.travis.yml index 9d91430..9653cf5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -56,17 +56,11 @@ addons: install: - > conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION - numpy nose pylint cython + numpy nose pylint cython tensorflow tensorboard - source activate test-environment # install pysam from conda because I'm having trouble installing Cython # for Python 3 on Travis - conda install -c bioconda pysam=0.9.0 - - | - # required to fix Travis for py3.5 but we should get rid - # of this when using py3.6+ - if [[ "$TRAVIS_PYTHON_VERSION" == "3.5" ]]; then - pip install setuptools --upgrade - fi - pip install -r requirements.txt - pip install . - pip install coveralls From 4fceafe6f8b1462a6f80a76a79414e19c50a45d9 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Thu, 13 Feb 2020 09:47:42 -0500 Subject: [PATCH 16/19] disable pylint for ExcelWriter --- vaxrank/report.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/vaxrank/report.py b/vaxrank/report.py index 3cd3d68..9b4560f 100644 --- a/vaxrank/report.py +++ b/vaxrank/report.py @@ -10,6 +10,9 @@ # See the License for the specific language governing permissions and # limitations under the License. +# pylint: disable=abstract-class-instantiated +# For more details see https://github.com/PyCQA/pylint/issues/3060 + from __future__ import absolute_import, division from collections import OrderedDict from importlib import import_module From ee26bb5ed0a68ff58c783d6f1c7b607dc255b21f Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Thu, 13 Feb 2020 09:48:53 -0500 Subject: [PATCH 17/19] re-enable testing on Py2.7 --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 9653cf5..24d7380 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ sudo: true language: python python: + - "2.7" - "3.5" git: # don't need the default depth of 50 From 995528885299a54c6c2d005a6ff461140edc7f61 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Thu, 13 Feb 2020 09:49:16 -0500 Subject: [PATCH 18/19] set PyPI version back to 2.7 --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 24d7380..2d0e18a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -94,4 +94,4 @@ deploy: secure: "kt07TyCxiumbu2SGZvl7J5LLzsLq0zxcCceOSGb4RE0tFOmiUmDv5LfGKME5lEFhPxleChaq/AILDOudEZO3H9xJY90/dXPgyXJOd6pCOzKq3zIKEjz3MEx3wBizcjR7ucHxYxDQzIBEBeoC4wTIc53mBR5gMgqyG8Dbciueg9Yo5qZED9rq/uY1ELxW2iQsS9LBK7n/blTPTisdmdG3g64eVEDsZD/0egPdupWhlkNuUQlA4LVr2KUifGrtXPL0A/m3CUEABJWre7PJ6motFS1KramP7/PQJwpXmZEHI1ayknNeN2ndMp4Ni64cEhtF/JIRYWBuGxzZXSuIv1g8XxKijm/qxSc4Gv0yIiP/Q3pZ5hfSC1RZhcpkc3ugCSOy5VwlF5O0P/EflMnsUE9ZRFON3s5+yjsJtNldsCRpdEsquFPn0BJC4kOdWKfIkYuhln6xcJawl+8uKlCJuOa1vZcsJ16E6RPN3TpkfChaMVBrD6zEbYZfrI5QCMV/uwkwutgV4H95FR0TlnDrOTwAMamxFBN7R8+YEHff9n21c4i7vzjBVlI16W/t2qqmaG/0w1hBhAiYvvV5BLh84km5gSewcjcKjBHVhYscF1Xxyq0CPmKdcOB6ePFbIWh/+NHQzDpzY8yKp9kzkZizZJGUAxsI2ZH3++BzQwcSUn664VY=" on: branch: master - condition: $TRAVIS_PYTHON_VERSION = "3.5" + condition: $TRAVIS_PYTHON_VERSION = "2.7" From ab5bf3942e0c5d36a3c94330ff61dbcb2165bf6a Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Thu, 13 Feb 2020 20:17:57 -0500 Subject: [PATCH 19/19] fixed nits --- test/test_epitope_prediction.py | 4 ++-- vaxrank/epitope_prediction.py | 2 +- vaxrank/mutant_protein_fragment.py | 1 - 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/test/test_epitope_prediction.py b/test/test_epitope_prediction.py index 7b0f285..72c03a5 100644 --- a/test/test_epitope_prediction.py +++ b/test/test_epitope_prediction.py @@ -102,5 +102,5 @@ def test_EpitopePrediction_json_serialization(): offset=1, occurs_in_reference=False) json = e.to_json() - e2 = EpitopePrediction.from_json((json)) - eq_(e, e2) \ No newline at end of file + e2 = EpitopePrediction.from_json(json) + eq_(e, e2) diff --git a/vaxrank/epitope_prediction.py b/vaxrank/epitope_prediction.py index aabba0c..c133e1e 100644 --- a/vaxrank/epitope_prediction.py +++ b/vaxrank/epitope_prediction.py @@ -59,7 +59,7 @@ def from_dict(cls, d): """ d = d.copy() if "length" in d: - # lenth argument removed in version 1.1.0 + # length argument removed in version 1.1.0 del d["length"] return cls(**d) diff --git a/vaxrank/mutant_protein_fragment.py b/vaxrank/mutant_protein_fragment.py index 170dce7..0b90c4b 100644 --- a/vaxrank/mutant_protein_fragment.py +++ b/vaxrank/mutant_protein_fragment.py @@ -52,7 +52,6 @@ def __init__( mutant_amino_acid_end_offset : int End offset of amino acids which differ due to the mutation - supporting_reference_transcripts : list of pyensembl.Transcript PyEnsembl Transcript objects for reference transcripts which were used to establish the reading frame of coding sequence(s)