From 573b80d86e1c0b52aed35fef4e3b0caba353d576 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 19 Jan 2021 11:35:50 -0600 Subject: [PATCH 1/5] Extract `is_XX` to it's own method --- pyard/pyard.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index 3b62741..1d3233d 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -188,7 +188,7 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: loc_antigen, code = loc_allele[0], loc_allele[1] # Handle XX codes - if self.is_mac(glstring) and code == "XX" and loc_antigen in self.xx_codes: + if self.is_XX(glstring, loc_antigen, code): return self.redux_gl("/".join(self.xx_codes[loc_antigen]), redux_type) # Handle MAC @@ -205,6 +205,12 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: return self.redux(glstring, redux_type) + def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: + if loc_antigen is None or code is None: + loc_allele = glstring.split(":") + loc_antigen, code = loc_allele[0], loc_allele[1] + return self.is_mac(glstring) and code == "XX" and loc_antigen in self.xx_codes + @staticmethod def is_serology(allele: str) -> bool: """ @@ -248,7 +254,7 @@ def is_v2(allele: str) -> bool: :param allele: Possible allele :return: Is the allele in V2 nomenclature """ - return '*' in allele and not ':' in allele + return '*' in allele and ':' not in allele def _is_valid_allele(self, allele): """ From 0034f874461dded646ed3a0d1a16689c1f69cf1c Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 2 Feb 2021 17:18:24 -0600 Subject: [PATCH 2/5] For V2 alleles that are not in the exception list, use prediction heuristic to convert to V3. --- README.rst | 78 +++++++++----------------------------------- pyard/pyard.py | 53 +++++++++++++++++++++++++++++- scripts/pyard-import | 3 +- 3 files changed, 70 insertions(+), 64 deletions(-) diff --git a/README.rst b/README.rst index 0558caf..360c2ea 100644 --- a/README.rst +++ b/README.rst @@ -11,7 +11,7 @@ py-ard :alt: Documentation Status -ARD reduction for HLA with python +ARD reduction for HLA with Python * Free software: LGPL 3.0 * Documentation: https://pyard.readthedocs.io. @@ -34,85 +34,39 @@ Install from PyPi pip install py-ard -Testing -------- - -To run behavior-driven development (BDD) tests locally via the behave framework, -you'll need to set up a virtual environment. See Install from source - -.. code-block:: - - # Install test dependencies - pip install --upgrade pip - pip install -r test-requirements.txt - - # Running Behave and all BDD tests - behave Example ------- .. code-block:: python3 - import pyard - - # Initialize ARD object with a version of IMGT HLA database - ard = pyard.ARD(3290) - - # You can specify a data directory for temp files - # ard = pyard.ARD('3290', data_dir='/tmp/py-ard') + from pyard import ARD - # Initialize with latest IMGT HLA database - ard = pyard.ARD() + # Initialize ARD object + ard = ARD('3290') - # You can choose to refresh the MAC code for previously used versions - # ard = pyard.ARD(3290, refresh_mac=True) + # Initialize with latest DB + ard = ARD() - # Allele to reduce + # + # Reduce Allele + # allele = "A*01:01:01" ard.redux(allele, 'G') - # 'A*01:01:01G' + # >> 'A*01:01:01G' ard.redux(allele, 'lg') - # 'A*01:01g' + # >> 'A*01:01g' ard.redux(allele, 'lgx') # 'A*01:01' - ard.redux_gl("A*01:01/A*01:01N+A*02:AB^B*07:02+B*07:AB", "G") + # + # Reduce GL String + # + ard_gl = ard.redux_gl("A*01:01/A*01:01N+A*02:AB^B*07:02+B*07:AB", "G") + # >>> ard_gl # 'B*07:02:01G+B*07:02:01G^A*01:01:01G+A*02:01:01G/A*02:02' - # py-ard can also reduce serology based typings - ard.redux_gl('HLA-A*10^HLA-A*9', 'lg') - # 'HLA-A*24:19g/HLA-A*24:22g^HLA-A*26:01g/HLA-A*26:10g/HLA-A*26:15g/HLA-A*26:92g/HLA-A*66:01g/HLA-A*66:03g' - - -Command Line Tools ------------------- - -.. code-block:: bash - - # Import the latest IMGT database - $ pyard-import - Created Latest py-ard database - - # Import particular version of IMGT database - $ pyard-import --import-db-version 3.29.0 - Created py-ard version 3290 database - - # Import particular version of IMGT database and - # replace the v2 to v3 mapping table - $ pyard-import --import-db-version 3.29.0 --v2-to-v3-mapping map2to3.csv - Created py-ard version 3290 database - Updated v2_mapping table with 'map2to3.csv' mapping file. - - # Reduce a gl string from command line - $ pyard --gl 'A*01:AB' -r lgx - A*01:01/A*01:02 - - $ pyard --gl 'DRB1*08:XX' -r G - DRB1*08:01:01G/DRB1*08:02:01G/DRB1*08:03:02G/DRB1*08:04:01G/DRB1*08:05/ ... - $ pyard -v 3290 --gl 'A1' -r lgx - A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ... \ No newline at end of file diff --git a/pyard/pyard.py b/pyard/pyard.py index 1d3233d..2d1f217 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -296,13 +296,53 @@ def _get_alleles_from_serology(self, serology) -> Iterable[str]: else: return alleles + def _combine_with_colon(self, digits_field): + num_of_digits = len(digits_field) + return ':'.join(digits_field[i:i + 2] for i in range(0, num_of_digits, 2)) + + def _predict_v3(self, v2_allele: str) -> str: + """ + Use heurestic to predict V3 from V2 + + :param v2_allele: Allele in V2 format + :return: V3 format of V2 allele + """ + # Separate out the locus and the allele name part + locus, allele_name = v2_allele.split('*') + # Separate out the numeric and non-numeric components + components = re.findall(r'^(\d+)(.*)', allele_name) + if not components: + return v2_allele + digits_field, non_digits_field = components.pop() + # final_allele is the result of the transformation + final_allele = digits_field + num_of_digits = len(digits_field) + if num_of_digits == 1: + return v2_allele + if num_of_digits > 2: + if locus.startswith('DP') and num_of_digits == 5: # covers DPs with 5 digits + final_allele = digits_field[:3] + ':' + (digits_field[3:]) + non_digits_field + elif num_of_digits % 2 == 0: # covers digits with 2, 4, 6, 8 + final_allele = self._combine_with_colon(digits_field) + non_digits_field + else: + final_allele = digits_field[:2] + ':' + (digits_field[2:]) + non_digits_field + else: + if non_digits_field: + final_allele = digits_field + ':' + non_digits_field + return locus + '*' + final_allele + def _map_v2_to_v3(self, v2_allele): """ Get V3 version of V2 versioned allele :param v2_allele: V2 versioned allele :return: V3 versioned allele """ - return v2_to_v3_allele(self.db_connection, v2_allele) + # Check if it's in the exception case mapping + v3_allele = v2_to_v3_allele(self.db_connection, v2_allele) + if not v3_allele: + # Try and predict V3 + v3_allele = self._predict_v3(v2_allele) + return v3_allele def isvalid(self, allele: str) -> bool: """ @@ -410,3 +450,14 @@ def expand_mac(self, mac_code: str): return list(self._get_alleles(code, locus_antigen)) return '' + + def v2_to_v3(self, v2_allele) -> str: + """ + Convert Version 2 Allele Name to Version 3 Allele Name + + :param v2_allele: Version 2 Allele Name + :return: Version 3 Allele Name + """ + if self.is_v2(v2_allele): + return self._map_v2_to_v3(v2_allele) + return v2_allele diff --git a/scripts/pyard-import b/scripts/pyard-import index b2b08b7..d0e99fd 100755 --- a/scripts/pyard-import +++ b/scripts/pyard-import @@ -92,6 +92,7 @@ if __name__ == '__main__': print(f"Created py-ard version {imgt_version} database") else: ard = pyard.ARD(data_dir=data_dir) + imgt_version = 'Latest' print(f"Created Latest py-ard database") del ard @@ -99,4 +100,4 @@ if __name__ == '__main__': db_connection = db.create_db_connection(data_dir, imgt_version, ro=False) db.save_dict(db_connection, table_name='v2_mapping', dictionary=v2_to_v3_dict, columns=('v2', 'v3')) - print(f"Updated v2_mapping table with '{args.v2_v3_mapping}' mapping file.") + print(f"Updated v2_mapping table with '{args.v2_v3_mapping}' mapping file for {imgt_version} IMGT database.") From 0a61c3462d1d513055681d319d2c9bce17932e41 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 2 Feb 2021 17:41:00 -0600 Subject: [PATCH 3/5] Update README --- README.rst | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 360c2ea..0f0db34 100644 --- a/README.rst +++ b/README.rst @@ -34,6 +34,20 @@ Install from PyPi pip install py-ard +Testing +------- + +To run behavior-driven development (BDD) tests locally via the behave framework, +you'll need to set up a virtual environment. See Install from source + +.. code-block:: + + # Install test dependencies + pip install --upgrade pip + pip install -r test-requirements.txt + + # Running Behave and all BDD tests + behave Example ------- @@ -54,13 +68,13 @@ Example allele = "A*01:01:01" ard.redux(allele, 'G') - # >> 'A*01:01:01G' + # >>> 'A*01:01:01G' ard.redux(allele, 'lg') - # >> 'A*01:01g' + # >>> 'A*01:01g' ard.redux(allele, 'lgx') - # 'A*01:01' + # >>> 'A*01:01' # # Reduce GL String @@ -69,4 +83,34 @@ Example # >>> ard_gl # 'B*07:02:01G+B*07:02:01G^A*01:01:01G+A*02:01:01G/A*02:02' +Command Line Tools +------------------ + +.. code-block:: bash + + # Import the latest IMGT database + $ pyard-import + Created Latest py-ard database + + # Import particular version of IMGT database + $ pyard-import --import-db-version 3.29.0 + Created py-ard version 3290 database + + # Import particular version of IMGT database and + # replace the v2 to v3 mapping table + $ pyard-import --import-db-version 3.29.0 --v2-to-v3-mapping map2to3.csv + Created py-ard version 3290 database + Updated v2_mapping table with 'map2to3.csv' mapping file. + + # Replace the Latest IMGT database with V2 mappings + $ pyard-import --v2-to-v3-mapping map2to3.csv + + # Reduce a gl string from command line + $ pyard --gl 'A*01:AB' -r lgx + A*01:01/A*01:02 + + $ pyard --gl 'DRB1*08:XX' -r G + DRB1*08:01:01G/DRB1*08:02:01G/DRB1*08:03:02G/DRB1*08:04:01G/DRB1*08:05/ ... + $ pyard -v 3290 --gl 'A1' -r lgx + A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ... From 189156a4fb6a4781dc9b99974f2d73a420b44cb6 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 2 Feb 2021 17:47:29 -0600 Subject: [PATCH 4/5] Update README --- README.rst | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/README.rst b/README.rst index 0f0db34..8da42d9 100644 --- a/README.rst +++ b/README.rst @@ -54,17 +54,23 @@ Example .. code-block:: python3 - from pyard import ARD + import pyard - # Initialize ARD object - ard = ARD('3290') + # Initialize ARD object with a version of IMGT HLA database + ard = pyard.ARD(3290) - # Initialize with latest DB - ard = ARD() + # You can specify a data directory for temp files + # ard = pyard.ARD('3290', data_dir='/tmp/py-ard') - # + # Initialize with latest IMGT HLA database + ard = pyard.ARD() + + # You can choose to refresh the MAC code for previously used versions + # ard = pyard.ARD(3290, refresh_mac=True) + + # # Reduce Allele - # + # allele = "A*01:01:01" ard.redux(allele, 'G') @@ -76,13 +82,19 @@ Example ard.redux(allele, 'lgx') # >>> 'A*01:01' - # + # # Reduce GL String - # - ard_gl = ard.redux_gl("A*01:01/A*01:01N+A*02:AB^B*07:02+B*07:AB", "G") - # >>> ard_gl + # + ard.redux_gl("A*01:01/A*01:01N+A*02:AB^B*07:02+B*07:AB", "G") # 'B*07:02:01G+B*07:02:01G^A*01:01:01G+A*02:01:01G/A*02:02' + # py-ard can also reduce serology based typings + ard.redux_gl('HLA-A*10^HLA-A*9', 'lg') + # >>> ard_gl + # 'HLA-A*24:19g/HLA-A*24:22g^HLA-A*26:01g/HLA-A*26:10g/HLA-A*26:15g/HLA-A*26:92g/HLA-A*66:01g/HLA-A*66:03g' + + + Command Line Tools ------------------ From 34e488856517be2142444812260a4006a20453d6 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 2 Feb 2021 17:50:58 -0600 Subject: [PATCH 5/5] Fix typo --- pyard/pyard.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyard/pyard.py b/pyard/pyard.py index 2d1f217..11a2277 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -302,7 +302,7 @@ def _combine_with_colon(self, digits_field): def _predict_v3(self, v2_allele: str) -> str: """ - Use heurestic to predict V3 from V2 + Use heuristic to predict V3 from V2 :param v2_allele: Allele in V2 format :return: V3 format of V2 allele