From 942dc3c035b16d48fdf3122e5d04be392a4fbe42 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Fri, 4 Sep 2020 14:50:42 -0500
Subject: [PATCH 1/5] ars type of `lgx` and `lg` should return 2-fields when
 not in G group

Fixes #40
---
 pyard/pyard.py | 20 +++++++++++++++-----
 setup.py       |  2 +-
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/pyard/pyard.py b/pyard/pyard.py
index e8099be..a5be8ae 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -404,16 +404,26 @@ def redux(self, allele: str, ars_type: str) -> str:
                 return self.dup_g[allele]
             else:
                 return self.G[allele]
-        elif ars_type == "lg" and allele in self._lg:
-            return self.lg[allele]
-        elif ars_type == "lgx" and allele in self._lgx:
-            return self.lgx[allele]
+        elif ars_type == "lg":
+            if allele in self._lg:
+                return self.lg[allele]
+            else:
+                # for 'lg' when allele is not in G group,
+                # return allele with only first 2 field
+                return ':'.join(allele.split(':')[0:2]) + 'g'
+        elif ars_type == "lgx":
+            if allele in self._lgx:
+                return self.lgx[allele]
+            else:
+                # for 'lgx' when allele is not in G group,
+                # return allele with only first 2 field
+                return ':'.join(allele.split(':')[0:2])
         else:
             if self.remove_invalid:
                 if allele in self.valid:
                     return allele
                 else:
-                    return
+                    return ''
             else:
                 return allele
 
diff --git a/setup.py b/setup.py
index bcc2640..6002b54 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
 
 setup(
     name='py-ard',
-    version='0.0.18',
+    version='0.0.19',
     description="ARD reduction for HLA with python",
     long_description=readme + '\n\n' + history,
     author="CIBMTR",

From ff34c71fe237c56cbffd647dac3657fcdd42435d Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Fri, 4 Sep 2020 16:00:42 -0500
Subject: [PATCH 2/5] Alleles with P and G groups are treated as valid alleles.

---
 pyard/pyard.py | 10 ++++++++--
 setup.py       |  2 +-
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/pyard/pyard.py b/pyard/pyard.py
index a5be8ae..af221e4 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -393,12 +393,15 @@ def redux(self, allele: str, ars_type: str) -> str:
         """
 
         # PERFORMANCE: precompiled regex
-        # dealing with leading HLA-
-
+        # dealing with leading 'HLA-'
         if self.HLA_regex.search(allele):
             hla, allele_name = allele.split("-")
             return "-".join(["HLA", self.redux(allele_name, ars_type)])
 
+        # Alleles ending with P or G are valid
+        if allele.endswith(('P', 'G')):
+            allele = allele[:-1]
+
         if ars_type == "G" and allele in self._G:
             if allele in self.dup_g:
                 return self.dup_g[allele]
@@ -498,6 +501,9 @@ def isvalid(self, allele: str) -> bool:
         if not ismac(allele):
             # PERFORMANCE: use hash instead of allele in "list"
             # return allele in self.valid
+            # Alleles ending with P or G are valid
+            if allele.endswith(('P', 'G')):
+                allele = allele[:-1]
             return self.valid_dict.get(allele, False)
         return True
 
diff --git a/setup.py b/setup.py
index 6002b54..9b93bf7 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
 
 setup(
     name='py-ard',
-    version='0.0.19',
+    version='0.0.20',
     description="ARD reduction for HLA with python",
     long_description=readme + '\n\n' + history,
     author="CIBMTR",

From 6e75defb386386924bed2fa2987d1b87a06667ea Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Tue, 8 Sep 2020 10:17:33 -0500
Subject: [PATCH 3/5] Fix Tests  - Check allele validation without the `HLA-`
 prefix  - cleanup tests

 Fixes #25 and #26
---
 pyard/pyard.py      |  5 +++
 tests/test_pyard.py | 94 ++++++++++++++++++---------------------------
 2 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/pyard/pyard.py b/pyard/pyard.py
index af221e4..be23f42 100644
--- a/pyard/pyard.py
+++ b/pyard/pyard.py
@@ -503,7 +503,12 @@ def isvalid(self, allele: str) -> bool:
             # return allele in self.valid
             # Alleles ending with P or G are valid
             if allele.endswith(('P', 'G')):
+                # remove the last character
                 allele = allele[:-1]
+            # validate allele without the 'HLA-' prefix
+            if self.HLA_regex.search(allele):
+                # remove 'HLA-' prefix
+                allele = allele[4:]
             return self.valid_dict.get(allele, False)
         return True
 
diff --git a/tests/test_pyard.py b/tests/test_pyard.py
index b29fc48..5b665fa 100644
--- a/tests/test_pyard.py
+++ b/tests/test_pyard.py
@@ -1,8 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-
 #
-#    pyars pyARS.
+#    py-ard pyARD.
 #    Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
 #
 #    This library is free software; you can redistribute it and/or modify it
@@ -24,75 +23,56 @@
 #
 
 """
-test_pyars
+test_pyard
 ----------------------------------
 
-Tests for `pyars` module.
+Tests for `py-ard` module.
 """
-import os
-import sys
 import json
+import os
 import unittest
 
 from pyard import ARD
 
 
-class TestPyard(unittest.TestCase):
+class TestPyArd(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls) -> None:
+        cls.db_version = '3290'
+        cls.ard = ARD(cls.db_version, data_dir='/tmp/3290')
 
     def setUp(self):
-        self.ard = ARD(verbose=True)
-        self.data_dir = os.path.dirname(__file__)
         self.assertIsInstance(self.ard, ARD)
-        expected_json = self.data_dir + "/expected.json"
-        with open(expected_json) as json_data:
-            self.expected = json.load(json_data)
-        pass
-
-    def test_000_nomac(self):
-        self.ardnomac = ARD(download_mac=False)
-        self.assertIsInstance(self.ardnomac, ARD)
-        self.assertFalse(self.ardnomac.download_mac)
-        self.assertTrue(len(self.ardnomac.mac.keys()) == 0)
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lg') == "A*01:01g")
-        self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lgx') == "A*01:01")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'G') == "HLA-A*01:01:01G")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lg') == "HLA-A*01:01g")
-        self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lgx') == "HLA-A*01:01")
-        pass
-
-    def test_001_dbversions(self):
-        for db in ['3310', '3300', '3290', '3280']:
-            self.arddb = ARD(dbversion=db, download_mac=False)
-            self.assertIsInstance(self.arddb, ARD)
-            self.assertFalse(self.arddb.download_mac)
-            self.assertTrue(self.arddb.dbversion == db)
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'G') == "A*01:01:01G")
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'lg') == "A*01:01g")
-            self.assertTrue(self.arddb.redux("A*01:01:01", 'lgx') == "A*01:01")
-        pass
 
-    def test_002_remove_invalid(self):
-        self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        pass
-
-    def test_003_mac(self):
-        self.assertTrue(self.ard.redux_gl("A*01:AB", 'G') == "A*01:01:01G/A*01:02")
-        self.assertTrue(self.ard.redux_gl("HLA-A*01:AB", 'G') == "HLA-A*01:01:01G/HLA-A*01:02")
-        pass
-
-    def test_004_redux_gl(self):
-        for ex in self.expected['redux_gl']:
+    def test_no_mac(self):
+        self.ard_no_mac = ARD(self.db_version, data_dir='/tmp/3290', download_mac=False)
+        self.assertIsInstance(self.ard_no_mac, ARD)
+        self.assertEqual(len(self.ard_no_mac.mac.keys()), 0)
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'G'), "A*01:01:01G")
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lg'), "A*01:01g")
+        self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lgx'), "A*01:01")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'G'), "HLA-A*01:01:01G")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lg'), "HLA-A*01:01g")
+        self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lgx'), "HLA-A*01:01")
+
+    def test_remove_invalid(self):
+        self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")
+
+    def test_mac(self):
+        self.assertEqual(self.ard.redux_gl("A*01:AB", 'G'), "A*01:01:01G/A*01:02")
+        self.assertEqual(self.ard.redux_gl("HLA-A*01:AB", 'G'), "HLA-A*01:01:01G/HLA-A*01:02")
+
+    def test_redux_gl(self):
+        data_dir = os.path.dirname(__file__)
+        expected_json = data_dir + "/expected.json"
+        with open(expected_json) as json_data:
+            expected = json.load(json_data)
+        for ex in expected['redux_gl']:
             glstring = ex['glstring']
             ard_type = ex['ard_type']
             expected_gl = ex['expected_gl']
-            self.assertTrue(self.ard.redux_gl(glstring, ard_type) == expected_gl)
-        pass
-
-    def test_005_mac_G(self):
-        self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
-        pass
-
-
-
+            self.assertEqual(self.ard.redux_gl(glstring, ard_type), expected_gl)
 
+    def test_mac_G(self):
+        self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")

From 937469c47824d0a810d4c25f0f82165beda40727 Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Tue, 8 Sep 2020 12:31:31 -0500
Subject: [PATCH 4/5] Smart Sort Fix and cleanup

 - Fix 4th field comparison bug
 - Remove unused function `smart_sort_alleles`
 - cleanup
 - upped version to `0.0.21`
---
 pyard/smart_sort.py      | 73 ++++++++++----------------------
 setup.py                 |  2 +-
 tests/test_smart_sort.py | 89 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 53 deletions(-)
 create mode 100644 tests/test_smart_sort.py

diff --git a/pyard/smart_sort.py b/pyard/smart_sort.py
index 04f70db..0316826 100644
--- a/pyard/smart_sort.py
+++ b/pyard/smart_sort.py
@@ -3,12 +3,13 @@
 
 expr_regex = re.compile('[NQLSGg]')
 
+
 @functools.lru_cache(maxsize=None)
 def smart_sort_comparator(a1, a2):
     """
     Natural sort 2 given alleles.
 
-    Python sorts strings lexographically but HLA alleles need
+    Python sorts strings lexicographically but HLA alleles need
     to be sorted by numerical values in each field of the HLA nomenclature.
 
     :param a1: first allele
@@ -19,85 +20,53 @@ def smart_sort_comparator(a1, a2):
     if a1 == a2:
         return 0
 
-
     # remove any non-numerics
     a1 = re.sub(expr_regex, '', a1)
     a2 = re.sub(expr_regex, '', a2)
+
+    # Check to see if they are still the same alleles
+    if a1 == a2:
+        return 0
+
     # Extract and Compare first fields first
-    a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
-    a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
+    a1_f1 = int(a1[a1.find('*') + 1:a1.find(':')])
+    a2_f1 = int(a2[a2.find('*') + 1:a2.find(':')])
 
     if a1_f1 < a2_f1:
         return -1
     if a1_f1 > a2_f1:
         return 1
 
-    # If the first fields are equal, try the 2nd fields
+    a1_fields = a1.split(':')
+    a2_fields = a2.split(':')
 
-    a1_f2 = int(a1.split(':')[1])
-    a2_f2 = int(a2.split(':')[1])
+    # If the first fields are equal, try the 2nd fields
+    a1_f2 = int(a1_fields[1])
+    a2_f2 = int(a2_fields[1])
 
     if a1_f2 < a2_f2:
         return -1
     if a1_f2 > a2_f2:
         return 1
 
-    # If the two fields are equal, try the 3rd fields
-
-    a1_f3 = int(a1.split(':')[2])
-    a2_f3 = int(a2.split(':')[2])
+    # If the second fields are equal, try the 3rd fields
+    a1_f3 = int(a1_fields[2])
+    a2_f3 = int(a2_fields[2])
 
     if a1_f3 < a2_f3:
         return -1
     if a1_f3 > a2_f3:
         return 1
 
-    # If the two fields are equal, try the 4th fields
-
-    a1_f4 = int(a1.split(':')[3])
-    a2_f3 = int(a2.split(':')[3])
+    # If the third fields are equal, try the 4th fields
+    a1_f4 = int(a1_fields[3])
+    a2_f4 = int(a2_fields[3])
 
     if a1_f4 < a2_f4:
         return -1
     if a1_f4 > a2_f4:
         return 1
 
-
-    
-    # All fields are equal
+    # All fields are considered equal after 4th field
     return 0
 
-def smart_sort_alleles(a1, a2):
-    """
-    Natural sort 2 given alleles.
-
-    Python sorts strings lexographically but HLA alleles need
-    to be sorted by numerical values in each field of the HLA nomenclature.
-
-    :param a1: first allele
-    :param a2: second allele
-    """
-    # Check to see if they are the same alleles
-    if a1 == a2:
-        return [a1, a2]
-
-    # Extract and Compare first fields first
-    a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
-    a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
-
-    if a1_f1 < a2_f1:
-        return [a1, a2]
-    if a1_f1 > a2_f1:
-        return [a2, a1]
-
-    # If the first fields are equal, try the 2nd fields
-    a1_f2 = int(a1[a1.find(':')+1:])
-    a2_f2 = int(a2[a2.find(':')+1:])
-
-    if a1_f2 < a2_f2:
-        return [a1, a2]
-    if a1_f2 > a2_f2:
-        return [a2, a1]
-
-    # All fields are equal
-    return [a1, a2]
diff --git a/setup.py b/setup.py
index 9b93bf7..0671864 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
 
 setup(
     name='py-ard',
-    version='0.0.20',
+    version='0.0.21',
     description="ARD reduction for HLA with python",
     long_description=readme + '\n\n' + history,
     author="CIBMTR",
diff --git a/tests/test_smart_sort.py b/tests/test_smart_sort.py
new file mode 100644
index 0000000..ab9063c
--- /dev/null
+++ b/tests/test_smart_sort.py
@@ -0,0 +1,89 @@
+import unittest
+
+from pyard.smart_sort import smart_sort_comparator
+
+
+class TestSmartSort(unittest.TestCase):
+
+    def setUp(self) -> None:
+        super().setUp()
+
+    def test_same_comparator(self):
+        allele = "HLA-A*01:01"
+        self.assertEqual(smart_sort_comparator(allele, allele), 0)
+
+    def test_equal_comparator(self):
+        allele1 = "HLA-A*01:01"
+        allele2 = "HLA-A*01:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
+
+    def test_equal_comparator_G(self):
+        # Should compare without G
+        allele1 = "HLA-A*01:01G"
+        allele2 = "HLA-A*01:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
+
+    def test_equal_comparator_NG(self):
+        # Should compare without N and G
+        allele1 = "HLA-A*01:01G"
+        allele2 = "HLA-A*01:01N"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 0)
+
+    def test_first_field_comparator_le(self):
+        allele1 = "HLA-A*01:01"
+        allele2 = "HLA-A*02:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_first_field_comparator_ge(self):
+        allele1 = "HLA-A*02:01"
+        allele2 = "HLA-A*01:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
+
+    def test_second_field_comparator_le(self):
+        allele1 = "HLA-A*01:01"
+        allele2 = "HLA-A*01:02"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_second_field_comparator_le_smart(self):
+        allele1 = "HLA-A*01:29"
+        allele2 = "HLA-A*01:100"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_second_field_comparator_ge(self):
+        allele1 = "HLA-A*01:02"
+        allele2 = "HLA-A*01:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
+
+    def test_third_field_comparator_le(self):
+        allele1 = "HLA-A*01:01:01"
+        allele2 = "HLA-A*01:01:20"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_third_field_comparator_le_smart(self):
+        allele1 = "HLA-A*01:01:29"
+        allele2 = "HLA-A*01:01:100"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_third_field_comparator_ge(self):
+        allele1 = "HLA-A*01:01:02"
+        allele2 = "HLA-A*01:01:01"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
+
+    def test_fourth_field_comparator_le(self):
+        allele1 = "HLA-A*01:01:01:01"
+        allele2 = "HLA-A*01:01:01:20"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_fourth_field_comparator_le_smart(self):
+        allele1 = "HLA-A*01:01:01:39"
+        allele2 = "HLA-A*01:01:01:200"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), -1)
+
+    def test_fourth_field_comparator_ge(self):
+        allele1 = "HLA-A*01:01:01:30"
+        allele2 = "HLA-A*01:01:01:09"
+        self.assertEqual(smart_sort_comparator(allele1, allele2), 1)
+
+
+if __name__ == '__main__':
+    unittest.main()

From 279f679f4d95b554dec2fe1d3e2efc259d81ed0b Mon Sep 17 00:00:00 2001
From: pbashyal-nmdp <pbashyal@nmdp.org>
Date: Tue, 8 Sep 2020 15:01:44 -0500
Subject: [PATCH 5/5] Update version t 0.0.21

---
 pyard/__init__.py | 2 +-
 setup.cfg         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyard/__init__.py b/pyard/__init__.py
index 8b8afcb..cb7e264 100644
--- a/pyard/__init__.py
+++ b/pyard/__init__.py
@@ -25,4 +25,4 @@
 from .pyard import ARD
 
 __author__ = """NMDP Bioinformatics"""
-__version__ = '0.0.13'
+__version__ = '0.0.21'
diff --git a/setup.cfg b/setup.cfg
index a919b18..be5a281 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.13
+current_version = 0.0.21
 commit = True
 tag = True