From f85b7895d891fc714696c34112b9f711b5a224fb Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn Date: Sun, 28 Apr 2019 15:26:31 -0400 Subject: [PATCH] small fixes to Travis, setup, and nucleotides helpers --- .coveragerc | 2 -- .travis.yml | 8 ++++---- requirements.txt | 6 +++--- setup.cfg | 6 ------ setup.py | 6 +++--- varcode/__init__.py | 4 ++-- varcode/nucleotides.py | 34 ++++++++++++++++++++++------------ 7 files changed, 34 insertions(+), 32 deletions(-) delete mode 100644 setup.cfg diff --git a/.coveragerc b/.coveragerc index 9fa5e5a..7190fb9 100644 --- a/.coveragerc +++ b/.coveragerc @@ -2,5 +2,3 @@ [run] omit = test/* - versioneer.py - varcode/_version.py diff --git a/.travis.yml b/.travis.yml index af92d40..da5c885 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,7 +18,7 @@ cache: - $HOME/.cache/pyensembl/ - $HOME/.cache/pyensembl/GRCh37/ensembl75/ - $HOME/.cache/pyensembl/GRCh38/ensembl81/ - - $HOME/.cache/pyensembl/GRCh38/ensembl87/ + - $HOME/.cache/pyensembl/GRCh38/ensembl95/ before_install: - | if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then @@ -62,20 +62,20 @@ before_script: --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.81/ - echo "Installed Ensembl 81" && df -h - # Ensembl 93 + # Ensembl 95 - > pyensembl install --release 95 --species human --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.95/ - - echo "Installed Ensembl 93 for humans" && df -h + - echo "Installed Ensembl 95 for humans" && df -h # latest mouse release - > pyensembl install --release 95 --species mouse --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.95/ - - echo "Installed Ensembl 93 for mice" && df -h + - echo "Installed Ensembl 95 for mice" && df -h script: # now actually run the tests, generate a coverage report and run linter - nosetests test --with-coverage --cover-package=varcode diff --git a/requirements.txt b/requirements.txt index 71ae7aa..a5fe818 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ numpy>=1.7, <2.0 pandas>=0.15 -pyensembl>=1.0.3 +pyensembl>=1.7.4 biopython>=1.64 pyvcf>=0.6.7 memoized_property>=1.0.2 nose>=1.3.3 pylint>=1.4.4 -serializable>=0.0.8 -sercol>=0.1.0 \ No newline at end of file +serializable>=0.1.0 +sercol>=0.1.4 diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 0f41399..0000000 --- a/setup.cfg +++ /dev/null @@ -1,6 +0,0 @@ -[versioneer] -VCS = git -style = pep440 -versionfile_source = varcode/_version.py -versionfile_build = varcode/_version.py -tag_prefix = v diff --git a/setup.py b/setup.py index effa55a..36b72fb 100644 --- a/setup.py +++ b/setup.py @@ -63,12 +63,12 @@ install_requires=[ 'numpy>=1.7, <2.0', 'pandas>=0.15', - 'pyensembl>=1.0.3', + 'pyensembl>=1.7.4', 'biopython>=1.64', 'pyvcf>=0.6.7', 'memoized_property>=1.0.2', - 'serializable>=0.0.8', - 'sercol>=0.1.0', + 'serializable>=0.1.1', + 'sercol>=0.1.4', ], entry_points={ 'console_scripts': [ diff --git a/varcode/__init__.py b/varcode/__init__.py index e1e9186..05612c4 100644 --- a/varcode/__init__.py +++ b/varcode/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2015-2018. Mount Sinai School of Medicine +# Copyright (c) 2015-2019. Mount Sinai School of Medicine # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -24,7 +24,7 @@ NonsilentCodingMutation, ) -__version__ = '0.9.0' +__version__ = '0.9.1' __all__ = [ # basic classes diff --git a/varcode/nucleotides.py b/varcode/nucleotides.py index 746c5a9..bc0a331 100644 --- a/varcode/nucleotides.py +++ b/varcode/nucleotides.py @@ -1,4 +1,4 @@ -# Copyright (c) 2016. Mount Sinai School of Medicine +# Copyright (c) 2016-2019. Mount Sinai School of Medicine # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ import numpy as np -import typechecks +from six import string_types # include all pseudonucleotides encoding repeats and uncertain bases STANDARD_NUCLEOTIDES = {'A', 'C', 'T', 'G'} @@ -49,10 +49,12 @@ 'N', # any base } + def is_purine(nucleotide, allow_extended_nucleotides=False): """Is the nucleotide a purine""" if not allow_extended_nucleotides and nucleotide not in STANDARD_NUCLEOTIDES: - raise ValueError("{} is a non-standard nucleotide, neither purine or pyrimidine".format(nucleotide)) + raise ValueError( + "{} is a non-standard nucleotide, neither purine or pyrimidine".format(nucleotide)) return nucleotide in PURINE_NUCLEOTIDES @@ -60,7 +62,11 @@ def all_standard_nucleotides(nucleotides): return all(base in STANDARD_NUCLEOTIDES for base in nucleotides) -def normalize_nucleotide_string(nucleotides, allow_extended_nucleotides=False): +def normalize_nucleotide_string( + nucleotides, + allow_extended_nucleotides=False, + empty_chars=".-", + treat_nan_as_empty=True): """ Normalizes a nucleotide string by converting various ways of encoding empty strings into "", making all letters upper case, and checking to make sure @@ -73,17 +79,21 @@ def normalize_nucleotide_string(nucleotides, allow_extended_nucleotides=False): extended_nucleotides : bool Allow non-canonical nucleotide characters like 'X' for unknown base + + empty_chars : str + Characters which encode empty strings, such as "." used in VCF format + or "-" used in MAF format + + treat_nan_as_empty : bool + Some MAF files represent deletions/insertions with NaN ref/alt values """ - # some MAF files represent deletions/insertions with NaN ref/alt values - if isinstance(nucleotides, float) and np.isnan(nucleotides): + if nucleotides in empty_chars: return "" - - # VCF files sometimes have '.' ref or alt for insertions and deletions, and - # MAF files sometimes have '-' ref or alt for insertions and deletions. - if nucleotides == "." or nucleotides == "-": + elif treat_nan_as_empty and isinstance(nucleotides, float) and np.isnan(nucleotides): return "" - - typechecks.require_string(nucleotides, "nucleotide string") + elif type(nucleotides) not in string_types: + raise TypeError("Expected str for nucleotide string but got %s" % ( + type(nucleotides),)) nucleotides = nucleotides.upper()