nmdp-bioinformatics · mmaiers-nmdp · Sep 9, 2020 · Sep 4, 2020 · Sep 4, 2020 · Sep 8, 2020
diff --git a/pyard/__init__.py b/pyard/__init__.py
@@ -25,4 +25,4 @@
 from .pyard import ARD
 
 __author__ = """NMDP Bioinformatics"""
-__version__ = '0.0.13'
+__version__ = '0.0.21'
diff --git a/pyard/pyard.py b/pyard/pyard.py
@@ -393,27 +393,40 @@ def redux(self, allele: str, ars_type: str) -> str:
         """
 
         # PERFORMANCE: precompiled regex
-        # dealing with leading HLA-
-
+        # dealing with leading 'HLA-'
         if self.HLA_regex.search(allele):
             hla, allele_name = allele.split("-")
             return "-".join(["HLA", self.redux(allele_name, ars_type)])
 
+        # Alleles ending with P or G are valid
+        if allele.endswith(('P', 'G')):
+            allele = allele[:-1]
+
         if ars_type == "G" and allele in self._G:
             if allele in self.dup_g:
                 return self.dup_g[allele]
             else:
                 return self.G[allele]
-        elif ars_type == "lg" and allele in self._lg:
-            return self.lg[allele]
-        elif ars_type == "lgx" and allele in self._lgx:
-            return self.lgx[allele]
+        elif ars_type == "lg":
+            if allele in self._lg:
+                return self.lg[allele]
+            else:
+                # for 'lg' when allele is not in G group,
+                # return allele with only first 2 field
+                return ':'.join(allele.split(':')[0:2]) + 'g'
+        elif ars_type == "lgx":
+            if allele in self._lgx:
+                return self.lgx[allele]
+            else:
+                # for 'lgx' when allele is not in G group,
+                # return allele with only first 2 field
+                return ':'.join(allele.split(':')[0:2])
         else:
             if self.remove_invalid:
                 if allele in self.valid:
                     return allele
                 else:
-                    return
+                    return ''
             else:
                 return allele
 
@@ -488,6 +501,14 @@ def isvalid(self, allele: str) -> bool:
         if not ismac(allele):
             # PERFORMANCE: use hash instead of allele in "list"
             # return allele in self.valid
+            # Alleles ending with P or G are valid
+            if allele.endswith(('P', 'G')):
+                # remove the last character
+                allele = allele[:-1]
+            # validate allele without the 'HLA-' prefix
+            if self.HLA_regex.search(allele):
+                # remove 'HLA-' prefix
+                allele = allele[4:]
             return self.valid_dict.get(allele, False)
         return True
 

diff --git a/pyard/smart_sort.py b/pyard/smart_sort.py
@@ -3,12 +3,13 @@
 
 expr_regex = re.compile('[NQLSGg]')
 
+
 @functools.lru_cache(maxsize=None)
 def smart_sort_comparator(a1, a2):
     """
     Natural sort 2 given alleles.
 
-    Python sorts strings lexographically but HLA alleles need
+    Python sorts strings lexicographically but HLA alleles need
     to be sorted by numerical values in each field of the HLA nomenclature.
 
     :param a1: first allele
@@ -19,85 +20,53 @@ def smart_sort_comparator(a1, a2):
     if a1 == a2:
         return 0
 
-
     # remove any non-numerics
     a1 = re.sub(expr_regex, '', a1)
     a2 = re.sub(expr_regex, '', a2)
+
+    # Check to see if they are still the same alleles
+    if a1 == a2:
+        return 0
+
     # Extract and Compare first fields first
-    a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
-    a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
+    a1_f1 = int(a1[a1.find('*') + 1:a1.find(':')])
+    a2_f1 = int(a2[a2.find('*') + 1:a2.find(':')])
 
     if a1_f1 < a2_f1:
         return -1
     if a1_f1 > a2_f1:
         return 1
 
-    # If the first fields are equal, try the 2nd fields
+    a1_fields = a1.split(':')
+    a2_fields = a2.split(':')
 
-    a1_f2 = int(a1.split(':')[1])
-    a2_f2 = int(a2.split(':')[1])
+    # If the first fields are equal, try the 2nd fields
+    a1_f2 = int(a1_fields[1])
+    a2_f2 = int(a2_fields[1])
 
     if a1_f2 < a2_f2:
         return -1
     if a1_f2 > a2_f2:
         return 1
 
-    # If the two fields are equal, try the 3rd fields
-
-    a1_f3 = int(a1.split(':')[2])
-    a2_f3 = int(a2.split(':')[2])
+    # If the second fields are equal, try the 3rd fields
+    a1_f3 = int(a1_fields[2])
+    a2_f3 = int(a2_fields[2])
 
     if a1_f3 < a2_f3:
         return -1
     if a1_f3 > a2_f3:
         return 1
 
-    # If the two fields are equal, try the 4th fields
-
-    a1_f4 = int(a1.split(':')[3])
-    a2_f3 = int(a2.split(':')[3])
+    # If the third fields are equal, try the 4th fields
+    a1_f4 = int(a1_fields[3])
+    a2_f4 = int(a2_fields[3])
 
     if a1_f4 < a2_f4:
         return -1
     if a1_f4 > a2_f4:
         return 1
 
-
-
-    # All fields are equal
+    # All fields are considered equal after 4th field
     return 0
 
-def smart_sort_alleles(a1, a2):
-    """
-    Natural sort 2 given alleles.
-
-    Python sorts strings lexographically but HLA alleles need
-    to be sorted by numerical values in each field of the HLA nomenclature.
-
-    :param a1: first allele
-    :param a2: second allele
-    """
-    # Check to see if they are the same alleles
-    if a1 == a2:
-        return [a1, a2]
-
-    # Extract and Compare first fields first
-    a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
-    a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
-
-    if a1_f1 < a2_f1:
-        return [a1, a2]
-    if a1_f1 > a2_f1:
-        return [a2, a1]
-
-    # If the first fields are equal, try the 2nd fields
-    a1_f2 = int(a1[a1.find(':')+1:])
-    a2_f2 = int(a2[a2.find(':')+1:])
-
-    if a1_f2 < a2_f2:
-        return [a1, a2]
-    if a1_f2 > a2_f2:
-        return [a2, a1]
-
-    # All fields are equal
-    return [a1, a2]
diff --git a/setup.cfg b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.13
+current_version = 0.0.21
 commit = True
 tag = True
 

diff --git a/setup.py b/setup.py
@@ -42,7 +42,7 @@
 
 setup(
     name='py-ard',
-    version='0.0.18',
+    version='0.0.21',
     description="ARD reduction for HLA with python",
     long_description=readme + '\n\n' + history,
     author="CIBMTR",

diff --git a/tests/test_pyard.py b/tests/test_pyard.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
 #
-#    pyars pyARS.
+#    py-ard pyARD.
 #    Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
 #
 #    This library is free software; you can redistribute it and/or modify it
@@ -24,75 +23,56 @@
 #
 
 """
-test_pyars
+test_pyard
 ----------------------------------
 
-Tests for `pyars` module.
+Tests for `py-ard` module.
 """
-import os
-import sys
 import json
+import os
 import unittest
 
 from pyard import ARD
 
 
-class TestPyard(unittest.TestCase):
+class TestPyArd(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.db_version = '3290'
+        cls.ard = ARD(cls.db_version, data_dir='/tmp/3290')
 
     def setUp(self):
-        self.ard = ARD(verbose=True)
-        self.data_dir = os.path.dirname(__file__)
         self.assertIsInstance(self.ard, ARD)
-        expected_json = self.data_dir + "/expected.json"
-        with open(expected_json) as json_data:
-            self.expected = json.load(json_data)
-        pass
-
-    def test_000_nomac(self):
-        self.ardnomac = ARD(download_mac=False)
-        self.assertIsInstance(self.ardnomac, ARD)
-        self.assertFalse(self.ardnomac.download_mac)
-        self.assertTrue(len(self.ardnomac.mac.keys()) == 0)
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lg') == "A*01:01g")
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lgx') == "A*01:01")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'G') == "HLA-A*01:01:01G")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lg') == "HLA-A*01:01g")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lgx') == "HLA-A*01:01")
-        pass
-
-    def test_001_dbversions(self):
-        for db in ['3310', '3300', '3290', '3280']:
-            self.arddb = ARD(dbversion=db, download_mac=False)
-            self.assertIsInstance(self.arddb, ARD)
-            self.assertFalse(self.arddb.download_mac)
-            self.assertTrue(self.arddb.dbversion == db)
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'G') == "A*01:01:01G")
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'lg') == "A*01:01g")
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'lgx') == "A*01:01")
-        pass
 
-    def test_002_remove_invalid(self):
-        self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        pass
-
-    def test_003_mac(self):
-        self.assertTrue(self.ard.redux_gl("A*01:AB", 'G') == "A*01:01:01G/A*01:02")
-        self.assertTrue(self.ard.redux_gl("HLA-A*01:AB", 'G') == "HLA-A*01:01:01G/HLA-A*01:02")
-        pass
-
-    def test_004_redux_gl(self):
-        for ex in self.expected['redux_gl']:
+    def test_no_mac(self):
+        self.ard_no_mac = ARD(self.db_version, data_dir='/tmp/3290', download_mac=False)
+        self.assertIsInstance(self.ard_no_mac, ARD)
+        self.assertEqual(len(self.ard_no_mac.mac.keys()), 0)
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'G'), "A*01:01:01G")
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lg'), "A*01:01g")
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lgx'), "A*01:01")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'G'), "HLA-A*01:01:01G")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lg'), "HLA-A*01:01g")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lgx'), "HLA-A*01:01")
+
+    def test_remove_invalid(self):
+        self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")
+
+    def test_mac(self):
+        self.assertEqual(self.ard.redux_gl("A*01:AB", 'G'), "A*01:01:01G/A*01:02")
+        self.assertEqual(self.ard.redux_gl("HLA-A*01:AB", 'G'), "HLA-A*01:01:01G/HLA-A*01:02")
+
+    def test_redux_gl(self):
+        data_dir = os.path.dirname(__file__)
+        expected_json = data_dir + "/expected.json"
+        with open(expected_json) as json_data:
+            expected = json.load(json_data)
+        for ex in expected['redux_gl']:
             glstring = ex['glstring']
             ard_type = ex['ard_type']
             expected_gl = ex['expected_gl']
-            self.assertTrue(self.ard.redux_gl(glstring, ard_type) == expected_gl)
-        pass
-
-    def test_005_mac_G(self):
-        self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        pass
-
-
-
+            self.assertEqual(self.ard.redux_gl(glstring, ard_type), expected_gl)
 
+    def test_mac_G(self):
+        self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")