Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Keep all variants, add gene set membership check #168

Merged
merged 12 commits into from
Sep 6, 2018
Merged
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,4 +69,4 @@ vaccine-peptides-report.xlsx
vaccine-peptides-report.json
vaccine-peptides.csv
neoepitope-report.xlsx

all_variants.csv
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
include LICENSE README.md
recursive-include vaxrank/data *
1 change: 1 addition & 0 deletions run-vaxrank-b16-test-data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ vaxrank \
--output-neoepitope-report neoepitope-report.xlsx \
--output-json-file vaccine-peptides-report.json \
--output-csv vaccine-peptides.csv \
--output-passing-variants-csv all_variants.csv \
--output-reviewed-by "John Doe,Jane Doe" \
--output-final-review "All the Does" \
--output-patient-id "Test Patient"
3 changes: 2 additions & 1 deletion test/data/b16.f10/b16.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
#chr pos id ref alt qual filter info
chr4 45802539 . G C . . .
chr9 82927102 . G T . . .
chr11 101177505 . T C . . .
chr13 5864876 . C CG . . .
chrX 8125624 . C A . . .
chrX 8125624 . C A . . .
19 changes: 14 additions & 5 deletions test/test_mutant_protein_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
from __future__ import absolute_import, print_function, division

from nose.tools import eq_, assert_almost_equal
from vaxrank.core_logic import ranked_vaccine_peptides
from vaxrank.core_logic import VaxrankCoreLogic
from mhctools import RandomBindingPredictor
from isovar.cli.variant_sequences_args import make_variant_sequences_arg_parser
from isovar.cli.rna_args import allele_reads_generator_from_args
from varcode.cli import variant_collection_from_args

from .testing_helpers import data_path

Expand Down Expand Up @@ -58,7 +59,9 @@ def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list, _ = ranked_vaccine_peptides(
variants = variant_collection_from_args(args)
core_logic = VaxrankCoreLogic(
variants=variants,
reads_generator=reads_generator,
mhc_predictor=random_binding_predictor,
vaccine_peptide_length=15,
Expand All @@ -67,6 +70,7 @@ def test_mutant_amino_acids_in_mm10_chrX_8125624_refC_altA_pS460I():
min_alt_rna_reads=1,
min_variant_sequence_coverage=1,
variant_sequence_assembly=True)
ranked_list = core_logic.ranked_vaccine_peptides()

for variant, vaccine_peptides in ranked_list:
eq_(
Expand All @@ -89,15 +93,18 @@ def test_mutant_amino_acids_in_mm10_chr9_82927102_refGT_altTG_pT441H():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)
ranked_list, _ = ranked_vaccine_peptides(
variants = variant_collection_from_args(args)
core_logic = VaxrankCoreLogic(
reads_generator=reads_generator,
mhc_predictor=random_binding_predictor,
variants=variants,
vaccine_peptide_length=15,
padding_around_mutation=5,
min_alt_rna_reads=1,
min_variant_sequence_coverage=1,
variant_sequence_assembly=True,
max_vaccine_peptides_per_variant=1)
ranked_list = core_logic.ranked_vaccine_peptides()

for variant, vaccine_peptides in ranked_list:
vaccine_peptide = vaccine_peptides[0]
Expand All @@ -113,18 +120,20 @@ def test_keep_top_k_epitopes():
"--bam", data_path("b16.f10/b16.combined.sorted.bam"),
])
reads_generator = allele_reads_generator_from_args(args)

variants = variant_collection_from_args(args)
keep_k_epitopes = 3
ranked_list, _ = ranked_vaccine_peptides(
core_logic = VaxrankCoreLogic(
reads_generator=reads_generator,
mhc_predictor=random_binding_predictor,
variants=variants,
vaccine_peptide_length=15,
padding_around_mutation=5,
min_alt_rna_reads=1,
min_variant_sequence_coverage=1,
variant_sequence_assembly=True,
max_vaccine_peptides_per_variant=1,
num_mutant_epitopes_to_keep=keep_k_epitopes)
ranked_list = core_logic.ranked_vaccine_peptides()

for variant, vaccine_peptides in ranked_list:
vaccine_peptide = vaccine_peptides[0]
Expand Down
18 changes: 17 additions & 1 deletion test/test_shell_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from mock import patch
from nose.plugins.attrib import attr
from tempfile import NamedTemporaryFile

import pandas as pd
from xlrd import open_workbook

from vaxrank.cli import main as run_shell_script
Expand Down Expand Up @@ -69,6 +71,20 @@ def test_csv_report():
assert len(lines) > 0


def test_all_variant_csv_report():
with NamedTemporaryFile(mode="r") as f:
all_csv_args = cli_args_for_b16_seqdata + [
"--output-passing-variants-csv", f.name, "--output-csv", f.name + "ignored"]
run_shell_script(all_csv_args)
contents = f.read()
lines = contents.split("\n")
assert len(lines) > 0
# make sure it can be a valid dataframe
f.seek(0)
df = pd.read_csv(f)
assert len(df) > 0


def test_xlsx_report():
with NamedTemporaryFile(mode="r") as f:
xlsx_args = cli_args_for_b16_seqdata + ["--output-xlsx-report", f.name]
Expand All @@ -94,7 +110,7 @@ def test_pdf_report():
assert getsize(f.name) > 0


@patch('vaxrank.core_logic.vaccine_peptides_for_variant')
@patch('vaxrank.core_logic.VaxrankCoreLogic.vaccine_peptides_for_variant')
def test_report_no_peptides(mock_vaccine_peptides_for_variant):
# simulate case where we have no epitopes for any variant
mock_vaccine_peptides_for_variant.return_value = []
Expand Down
2 changes: 1 addition & 1 deletion vaxrank/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.8.5"
__version__ = "0.8.6"
31 changes: 25 additions & 6 deletions vaxrank/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,12 @@
mhc_binding_predictor_from_args,
)

import pandas as pd
import serializable
from varcode.cli import variant_collection_from_args

from .core_logic import ranked_vaccine_peptides
from .core_logic import VaxrankCoreLogic
from .gene_pathway_check import GenePathwayCheck
from .report import (
make_ascii_report,
make_html_report,
Expand Down Expand Up @@ -180,6 +182,11 @@ def add_output_args(arg_parser):
type=int,
help="Number of mutations to report")

output_args_group.add_argument(
"--output-passing-variants-csv",
julia326 marked this conversation as resolved.
Show resolved Hide resolved
default="",
help="Path to CSV file containing some metadata about every passing variant")


def add_vaccine_peptide_args(arg_parser):
vaccine_peptide_group = arg_parser.add_argument_group("Vaccine peptide options")
Expand Down Expand Up @@ -272,7 +279,8 @@ def ranked_variant_list_with_metadata(args):
reads_generator = allele_reads_generator_from_args(args)
mhc_predictor = mhc_binding_predictor_from_args(args)

ranked_list, variants_count_dict = ranked_vaccine_peptides(
core_logic = VaxrankCoreLogic(
variants=variants,
reads_generator=reads_generator,
mhc_predictor=mhc_predictor,
vaccine_peptide_length=args.vaccine_peptide_length,
Expand All @@ -282,21 +290,32 @@ def ranked_variant_list_with_metadata(args):
min_variant_sequence_coverage=args.min_variant_sequence_coverage,
min_epitope_score=args.min_epitope_score,
num_mutant_epitopes_to_keep=args.num_epitopes_per_peptide,
variant_sequence_assembly=args.variant_sequence_assembly)
variant_sequence_assembly=args.variant_sequence_assembly,
gene_pathway_check=GenePathwayCheck()
)

ranked_list_for_report = ranked_list[:args.max_mutations_in_report]
variants_count_dict = core_logic.variant_counts()
assert len(variants) == variants_count_dict['num_total_variants'], \
"Len(variants) is %d but variants_count_dict came back with %d" % (
len(variants), variants_count_dict['num_total_variants'])

if args.output_passing_variants_csv:
variant_metadata_dicts = core_logic.variant_properties()
df = pd.DataFrame(variant_metadata_dicts)
df.to_csv(args.output_passing_variants_csv, index=False)

ranked_list = core_logic.ranked_vaccine_peptides()
ranked_list_for_report = ranked_list[:args.max_mutations_in_report]
patient_info = PatientInfo(
patient_id=args.output_patient_id,
vcf_paths=variants.sources,
bam_path=args.bam,
mhc_alleles=mhc_alleles,
num_somatic_variants=len(variants),
num_somatic_variants=variants_count_dict['num_total_variants'],
num_coding_effect_variants=variants_count_dict['num_coding_effect_variants'],
num_variants_with_rna_support=variants_count_dict['num_variants_with_rna_support'],
num_variants_with_vaccine_peptides=variants_count_dict['num_variants_with_vaccine_peptides']
)

# return variants, patient info, and command-line args
data = {
'variants': ranked_list_for_report,
Expand Down
Loading