Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
from .pyard import ARD

__author__ = """NMDP Bioinformatics"""
__version__ = '0.4.1'
__version__ = '0.5.0'
26 changes: 24 additions & 2 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,23 @@ def generate_mac_codes(db_connection: sqlite3.Connection, refresh_mac: bool):
dictionary=mac, columns=('code', 'alleles'))


def to_serological_name(locus_name: str):
"""
Map a DNA Allele name to Serological Equivalent.
http://hla.alleles.org/antigens/recognised_serology.html
Eg:
A*1 -> A1
...
DRB5*51 -> DR51
:param locus_name: DNA Locus Name
:return: Serological equivalent
"""
locus, sero_number = locus_name.split('*')
sero_locus = locus[:2]
sero_name = sero_locus + sero_number
return sero_name


def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
if not db.table_exists(db_connection, 'serology_mapping'):
# Load WMDA serology mapping data
Expand Down Expand Up @@ -270,8 +287,13 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
sero_mapping_combined = pd.concat([usa[['Sero', 'Allele']],
psa[['Sero', 'Allele']],
asa[['Sero', 'Allele']]])
sero_mapping = sero_mapping_combined.groupby('Sero').\
apply(lambda x: '/'.join(sorted(x['Allele']))).\

# Map to only valid serological antigen name
sero_mapping_combined['Sero'] = sero_mapping_combined['Sero']. \
apply(to_serological_name)

sero_mapping = sero_mapping_combined.groupby('Sero'). \
apply(lambda x: '/'.join(sorted(x['Allele']))). \
to_dict()

# Save the serology mapping to db
Expand Down
4 changes: 2 additions & 2 deletions pyard/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def mac_code_to_alleles(connection: sqlite3.Connection, code: str) -> List[str]:
if result:
alleles = result[0].split('/')
else:
alleles = None
alleles = []
return alleles


Expand All @@ -98,7 +98,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
if result:
alleles = result[0].split('/')
else:
alleles = None
alleles = []
return alleles


Expand Down
48 changes: 33 additions & 15 deletions pyard/pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,20 +174,14 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:

# Handle Serology
if self.is_serology(glstring):
if HLA_regex.search(glstring):
# Remove HLA- prefix
serology = glstring.split("-")[1]
alleles = self._get_alleles_from_serology(serology)
alleles = ['HLA-' + a for a in alleles]
else:
alleles = self._get_alleles_from_serology(glstring)
alleles = self._get_alleles_from_serology(glstring)
return self.redux_gl("/".join(alleles), redux_type)

loc_allele = glstring.split(":")
loc_name, code = loc_allele[0], loc_allele[1]

# Handle XX codes
if (self.is_mac(glstring) and glstring.split(":")[1] == "XX") and loc_name in self.xx_codes:
if self.is_mac(glstring) and code == "XX" and loc_name in self.xx_codes:
return self.redux_gl("/".join(self.xx_codes[loc_name]), redux_type)

# Handle MAC
Expand All @@ -207,11 +201,26 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
@staticmethod
def is_serology(allele: str) -> bool:
"""
An allele is serology if the allele name after * is numeral only, no ':'
A serology has the locus name (first 2 letters for DRB1, DRB3, DQB1, DQA1, DPB1 and DPA1)
of the allele followed by numerical antigen.

:param allele: The allele to test for serology
:return: True if serology
"""
return allele.split('*')[1].isdigit()
if '*' in allele or ':' in allele:
return False

locus = allele[0:2]
if locus in ['DR', 'DP', 'DQ']:
antigen = allele[2:]
return antigen.isdigit()

locus = allele[0:1]
if locus in ['A', 'B', 'C', 'D']:
antigen = allele[1:]
return antigen.isdigit()

return False

@staticmethod
def is_mac(gl: str) -> bool:
Expand All @@ -229,7 +238,9 @@ def _is_valid_allele(self, allele):
:param allele: Allele to test
:return: bool to indicate if allele is valid
"""
return allele in self.valid_alleles
if self._remove_invalid:
return allele in self.valid_alleles
return True

def _get_alleles(self, code, loc_name) -> Iterable[str]:
"""
Expand All @@ -239,12 +250,18 @@ def _get_alleles(self, code, loc_name) -> Iterable[str]:
:return: valid alleles corresponding to allele code
"""
alleles = mac_code_to_alleles(self.db_connection, code)
return filter(self._is_valid_allele,
[f'{loc_name}:{a}' for a in alleles])
if self._remove_invalid:
return filter(self._is_valid_allele,
[f'{loc_name}:{a}' for a in alleles])
else:
return [f'{loc_name}:{a}' for a in alleles]

def _get_alleles_from_serology(self, serology) -> Iterable[str]:
alleles = db.serology_to_alleles(self.db_connection, serology)
return filter(self._is_valid_allele, alleles)
if self._remove_invalid:
return filter(self._is_valid_allele, alleles)
else:
return alleles

def isvalid(self, allele: str) -> bool:
"""
Expand All @@ -255,6 +272,8 @@ def isvalid(self, allele: str) -> bool:
:return: allele or empty
:rtype: bool
"""
if allele == '':
return False
if not self.is_mac(allele) and not self.is_serology(allele):
# Alleles ending with P or G are valid_alleles
if allele.endswith(('P', 'G')):
Expand Down Expand Up @@ -310,7 +329,6 @@ def mac_toG(self, allele: str) -> str:
return ''
else:
return "/".join(group)

else:
return ''

Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.4.1
current_version = 0.5.0
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name='py-ard',
version='0.4.1',
version='0.5.0',
description="ARD reduction for HLA with python",
long_description=readme + '\n\n' + history,
author="CIBMTR",
Expand Down
16 changes: 6 additions & 10 deletions tests/features/serology.feature
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@ Feature: Serology

Examples: Valid A serology typings
| Serology | Level | Redux Allele |
| A*10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G |
| A*10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g |
| A*10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 |

Examples: With HLA- prefix
| Serology | Level | Redux Allele |
| HLA-A*10 | G | HLA-A*26:01:01G/HLA-A*26:10/HLA-A*26:15/HLA-A*26:92/HLA-A*66:01:01G/HLA-A*66:03:01G |
| HLA-B*15:03 | G | HLA-B*15:03:01G |
| HLA-DQB1*1 | G | HLA-DQB1*06:11:01/HLA-DQB1*06:11:02/HLA-DQB1*06:11:03/HLA-DQB1*06:12 |
| HLA-DQB1*1 | lg | HLA-DQB1*06:11g/HLA-DQB1*06:12g |
| A10 | G | A*26:01:01G/A*26:10/A*26:15/A*26:92/A*66:01:01G/A*66:03:01G |
| A10 | lg | A*26:01g/A*26:10g/A*26:15g/A*26:92g/A*66:01g/A*66:03g |
| A10 | lgx | A*26:01/A*26:10/A*26:15/A*26:92/A*66:01/A*66:03 |
| A19 | G | A*02:65/A*33:09 |
| DR1403 | G | DRB1*14:03:01/DRB1*14:03:02 |
| DR2 | G | DRB1*15:08/DRB1*16:03 |