small fixes to Travis, setup, and nucleotides helpers

openvax · Apr 28, 2019 · f85b789 · f85b789
1 parent 1449fe2
commit f85b789
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 32 deletions.
diff --git a/.coveragerc b/.coveragerc
@@ -2,5 +2,3 @@
 [run]
 omit = 
 	test/*
-	versioneer.py
-	varcode/_version.py
diff --git a/.travis.yml b/.travis.yml
@@ -18,7 +18,7 @@ cache:
     - $HOME/.cache/pyensembl/
     - $HOME/.cache/pyensembl/GRCh37/ensembl75/
     - $HOME/.cache/pyensembl/GRCh38/ensembl81/
-    - $HOME/.cache/pyensembl/GRCh38/ensembl87/
+    - $HOME/.cache/pyensembl/GRCh38/ensembl95/
 before_install:
   - |
     if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then
@@ -62,20 +62,20 @@ before_script:
       --species human
       --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.81/
   - echo "Installed Ensembl 81" && df -h
-  # Ensembl 93
+  # Ensembl 95
   - >
       pyensembl install
       --release 95
       --species human
       --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCh38.95/
-  - echo "Installed Ensembl 93 for humans" && df -h
+  - echo "Installed Ensembl 95 for humans" && df -h
   # latest mouse release
   - >
       pyensembl install
       --release 95
       --species mouse
       --custom-mirror https://github.com/openvax/ensembl-data/releases/download/GRCm38.95/
-  - echo "Installed Ensembl 93 for mice" && df -h
+  - echo "Installed Ensembl 95 for mice" && df -h
 script:
   # now actually run the tests, generate a coverage report and run linter
   - nosetests test --with-coverage --cover-package=varcode

diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,10 @@
 numpy>=1.7, <2.0
 pandas>=0.15
-pyensembl>=1.0.3
+pyensembl>=1.7.4
 biopython>=1.64
 pyvcf>=0.6.7
 memoized_property>=1.0.2
 nose>=1.3.3
 pylint>=1.4.4
-serializable>=0.0.8
-sercol>=0.1.0
+serializable>=0.1.0
+sercol>=0.1.4
diff --git a/setup.cfg b/setup.cfg
diff --git a/setup.py b/setup.py
@@ -63,12 +63,12 @@
         install_requires=[
             'numpy>=1.7, <2.0',
             'pandas>=0.15',
-            'pyensembl>=1.0.3',
+            'pyensembl>=1.7.4',
             'biopython>=1.64',
             'pyvcf>=0.6.7',
             'memoized_property>=1.0.2',
-            'serializable>=0.0.8',
-            'sercol>=0.1.0',
+            'serializable>=0.1.1',
+            'sercol>=0.1.4',
         ],
         entry_points={
             'console_scripts': [

diff --git a/varcode/__init__.py b/varcode/__init__.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2015-2018. Mount Sinai School of Medicine
+# Copyright (c) 2015-2019. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -24,7 +24,7 @@
     NonsilentCodingMutation,
 )
 
-__version__ = '0.9.0'
+__version__ = '0.9.1'
 
 __all__ = [
     # basic classes

diff --git a/varcode/nucleotides.py b/varcode/nucleotides.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2016. Mount Sinai School of Medicine
+# Copyright (c) 2016-2019. Mount Sinai School of Medicine
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,7 +16,7 @@
 
 import numpy as np
 
-import typechecks
+from six import string_types
 
 # include all pseudonucleotides encoding repeats and uncertain bases
 STANDARD_NUCLEOTIDES = {'A', 'C', 'T', 'G'}
@@ -49,18 +49,24 @@
     'N',  # any base
 }
 
+
 def is_purine(nucleotide, allow_extended_nucleotides=False):
     """Is the nucleotide a purine"""
     if not allow_extended_nucleotides and nucleotide not in STANDARD_NUCLEOTIDES:
-        raise ValueError("{} is a non-standard nucleotide, neither purine or pyrimidine".format(nucleotide))
+        raise ValueError(
+            "{} is a non-standard nucleotide, neither purine or pyrimidine".format(nucleotide))
     return nucleotide in PURINE_NUCLEOTIDES
 
 
 def all_standard_nucleotides(nucleotides):
     return all(base in STANDARD_NUCLEOTIDES for base in nucleotides)
 
 
-def normalize_nucleotide_string(nucleotides, allow_extended_nucleotides=False):
+def normalize_nucleotide_string(
+        nucleotides,
+        allow_extended_nucleotides=False,
+        empty_chars=".-",
+        treat_nan_as_empty=True):
     """
     Normalizes a nucleotide string by converting various ways of encoding empty
     strings into "", making all letters upper case, and checking to make sure
@@ -73,17 +79,21 @@ def normalize_nucleotide_string(nucleotides, allow_extended_nucleotides=False):
 
     extended_nucleotides : bool
         Allow non-canonical nucleotide characters like 'X' for unknown base
+
+    empty_chars : str
+        Characters which encode empty strings, such as "." used in VCF format
+        or "-" used in MAF format
+
+    treat_nan_as_empty : bool
+        Some MAF files represent deletions/insertions with NaN ref/alt values
     """
-    # some MAF files represent deletions/insertions with NaN ref/alt values
-    if isinstance(nucleotides, float) and np.isnan(nucleotides):
+    if nucleotides in empty_chars:
         return ""
-
-    # VCF files sometimes have '.' ref or alt for insertions and deletions, and
-    # MAF files sometimes have '-' ref or alt for insertions and deletions.
-    if nucleotides == "." or nucleotides == "-":
+    elif treat_nan_as_empty and isinstance(nucleotides, float) and np.isnan(nucleotides):
         return ""
-
-    typechecks.require_string(nucleotides, "nucleotide string")
+    elif type(nucleotides) not in string_types:
+        raise TypeError("Expected str for nucleotide string but got %s" % (
+            type(nucleotides),))
 
     nucleotides = nucleotides.upper()