diff --git a/pyard/__init__.py b/pyard/__init__.py index 9a74116..3755ec5 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -24,4 +24,4 @@ from .pyard import ARD __author__ = """NMDP Bioinformatics""" -__version__ = '0.4.1' +__version__ = '0.5.0' diff --git a/pyard/data_repository.py b/pyard/data_repository.py index 81364fd..7c5cef0 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -240,6 +240,23 @@ def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool): dictionary=mac, columns=('code', 'alleles')) +def to_serological_name(locus_name: str): + """ + Map a DNA Allele name to Serological Equivalent. + http://hla.alleles.org/antigens/recognised_serology.html + Eg: + A*1 -> A1 + ... + DRB5*51 -> DR51 + :param locus_name: DNA Locus Name + :return: Serological equivalent + """ + locus, sero_number = locus_name.split('*') + sero_locus = locus[:2] + sero_name = sero_locus + sero_number + return sero_name + + def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version): if not db.table_exists(db_connection, 'serology_mapping'): # Load WMDA serology mapping data @@ -270,8 +287,13 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version): sero_mapping_combined = pd.concat([usa[['Sero', 'Allele']], psa[['Sero', 'Allele']], asa[['Sero', 'Allele']]]) - sero_mapping = sero_mapping_combined.groupby('Sero').\ - apply(lambda x: '/'.join(sorted(x['Allele']))).\ + + # Map to only valid serological antigen name + sero_mapping_combined['Sero'] = sero_mapping_combined['Sero']. \ + apply(to_serological_name) + + sero_mapping = sero_mapping_combined.groupby('Sero'). \ + apply(lambda x: '/'.join(sorted(x['Allele']))). \ to_dict() # Save the serology mapping to db diff --git a/pyard/db.py b/pyard/db.py index 6988473..13d754a 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -79,7 +79,7 @@ def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]: if result: alleles = result[0].split('/') else: - alleles = None + alleles = [] return alleles @@ -98,7 +98,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s if result: alleles = result[0].split('/') else: - alleles = None + alleles = [] return alleles diff --git a/pyard/pyard.py b/pyard/pyard.py index 96a4487..7f31d0e 100644 --- a/pyard/pyard.py +++ b/pyard/pyard.py @@ -174,20 +174,14 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: # Handle Serology if self.is_serology(glstring): - if HLA_regex.search(glstring): - # Remove HLA- prefix - serology = glstring.split("-")[1] - alleles = self._get_alleles_from_serology(serology) - alleles = ['HLA-' + a for a in alleles] - else: - alleles = self._get_alleles_from_serology(glstring) + alleles = self._get_alleles_from_serology(glstring) return self.redux_gl("/".join(alleles), redux_type) loc_allele = glstring.split(":") loc_name, code = loc_allele[0], loc_allele[1] # Handle XX codes - if (self.is_mac(glstring) and glstring.split(":")[1] == "XX") and loc_name in self.xx_codes: + if self.is_mac(glstring) and code == "XX" and loc_name in self.xx_codes: return self.redux_gl("/".join(self.xx_codes[loc_name]), redux_type) # Handle MAC @@ -207,11 +201,26 @@ def redux_gl(self, glstring: str, redux_type: str) -> str: @staticmethod def is_serology(allele: str) -> bool: """ - An allele is serology if the allele name after * is numeral only, no ':' + A serology has the locus name (first 2 letters for DRB1, DRB3, DQB1, DQA1, DPB1 and DPA1) + of the allele followed by numerical antigen. + :param allele: The allele to test for serology :return: True if serology """ - return allele.split('*')[1].isdigit() + if '*' in allele or ':' in allele: + return False + + locus = allele[0:2] + if locus in ['DR', 'DP', 'DQ']: + antigen = allele[2:] + return antigen.isdigit() + + locus = allele[0:1] + if locus in ['A', 'B', 'C', 'D']: + antigen = allele[1:] + return antigen.isdigit() + + return False @staticmethod def is_mac(gl: str) -> bool: @@ -229,7 +238,9 @@ def _is_valid_allele(self, allele): :param allele: Allele to test :return: bool to indicate if allele is valid """ - return allele in self.valid_alleles + if self._remove_invalid: + return allele in self.valid_alleles + return True def _get_alleles(self, code, loc_name) -> Iterable[str]: """ @@ -239,12 +250,18 @@ def _get_alleles(self, code, loc_name) -> Iterable[str]: :return: valid alleles corresponding to allele code """ alleles = mac_code_to_alleles(self.db_connection, code) - return filter(self._is_valid_allele, - [f'{loc_name}:{a}' for a in alleles]) + if self._remove_invalid: + return filter(self._is_valid_allele, + [f'{loc_name}:{a}' for a in alleles]) + else: + return [f'{loc_name}:{a}' for a in alleles] def _get_alleles_from_serology(self, serology) -> Iterable[str]: alleles = db.serology_to_alleles(self.db_connection, serology) - return filter(self._is_valid_allele, alleles) + if self._remove_invalid: + return filter(self._is_valid_allele, alleles) + else: + return alleles def isvalid(self, allele: str) -> bool: """ @@ -255,6 +272,8 @@ def isvalid(self, allele: str) -> bool: :return: allele or empty :rtype: bool """ + if allele == '': + return False if not self.is_mac(allele) and not self.is_serology(allele): # Alleles ending with P or G are valid_alleles if allele.endswith(('P', 'G')): @@ -310,7 +329,6 @@ def mac_toG(self, allele: str) -> str: return '' else: return "/".join(group) - else: return '' diff --git a/setup.cfg b/setup.cfg index 31e67b2..4f70fe1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.4.1 +current_version = 0.5.0 commit = True tag = True diff --git a/setup.py b/setup.py index 888fa90..827e6b2 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name='py-ard', - version='0.4.1', + version='0.5.0', description="ARD reduction for HLA with python", long_description=readme + '\n\n' + history, author="CIBMTR", diff --git a/tests/features/serology.feature b/tests/features/serology.feature index 296c3ba..8943c26 100644 --- a/tests/features/serology.feature +++ b/tests/features/serology.feature @@ -12,13 +12,9 @@ Feature: Serology Examples: Valid A serology typings | Serology | Level | Redux Allele | - | A*10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G | - | A*10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g | - | A*10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 | - - Examples: With HLA- prefix - | Serology | Level | Redux Allele | - | HLA-A*10 | G | HLA-A*26:01:01G/HLA-A*26:10/HLA-A*26:15/HLA-A*26:92/HLA-A*66:01:01G/HLA-A*66:03:01G | - | HLA-B*15:03 | G | HLA-B*15:03:01G | - | HLA-DQB1*1 | G | HLA-DQB1*06:11:01/HLA-DQB1*06:11:02/HLA-DQB1*06:11:03/HLA-DQB1*06:12 | - | HLA-DQB1*1 | lg | HLA-DQB1*06:11g/HLA-DQB1*06:12g | + | A10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G | + | A10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g | + | A10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 | + | A19 | G | A*02:65/A*33:09 | + | DR1403 | G | DRB1*14:03:01/DRB1*14:03:02 | + | DR2 | G | DRB1*15:08/DRB1*16:03 |