Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,31 @@ Example
# 'HLA-A*24:19g/HLA-A*24:22g^HLA-A*26:01g/HLA-A*26:10g/HLA-A*26:15g/HLA-A*26:92g/HLA-A*66:01g/HLA-A*66:03g'


Command Line Tools
------------------

.. code-block:: bash

# Import the latest IMGT database
$ pyard-import
Created Latest py-ard database

# Import particular version of IMGT database
$ pyard-import --import-db-version 3.29.0
Created py-ard version 3290 database

# Import particular version of IMGT database and
# replace the v2 to v3 mapping table
$ pyard-import --import-db-version 3.29.0 --v2-to-v3-mapping map2to3.csv
Created py-ard version 3290 database
Updated v2_mapping table with 'map2to3.csv' mapping file.

# Reduce a gl string from command line
$ pyard --gl 'A*01:AB' -r lgx
A*01:01/A*01:02

$ pyard --gl 'DRB1*08:XX' -r G
DRB1*08:01:01G/DRB1*08:02:01G/DRB1*08:03:02G/DRB1*08:04:01G/DRB1*08:05/ ...

$ pyard -v 3290 --gl 'A1' -r lgx
A*01:01/A*01:02/A*01:03/A*01:06/A*01:07/A*01:08/A*01:09/A*01:10/A*01:12/ ...
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,4 @@
from .pyard import ARD

__author__ = """NMDP Bioinformatics"""
__version__ = '0.5.1'
__version__ = '0.6.0'
44 changes: 44 additions & 0 deletions pyard/data_repository.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# -*- coding: utf-8 -*-
#
# py-ard
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 3 of the License, or (at
# your option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import functools
import sqlite3

Expand Down Expand Up @@ -299,3 +321,25 @@ def generate_serology_mapping(db_connection: sqlite3.Connection, imgt_version):
# Save the serology mapping to db
db.save_dict(db_connection, table_name='serology_mapping',
dictionary=sero_mapping, columns=('serology', 'allele_list'))


def generate_v2_to_v3_mapping(db_connection: sqlite3.Connection, imgt_version):
if not db.table_exists(db_connection, 'v2_mapping'):
# TODO: Create mapping table using both the allele list history and
# deleted alleles as reference.
# Temporary Example
v2_to_v3_example = {
"A*0104": "A*01:04N",
"A*0105N": "A*01:04N",
"A*0111": "A*01:11N",
"A*01123": "A*01:123N",
"A*0115": "A*01:15N",
"A*0116": "A*01:16N",
"A*01160": "A*01:160N",
"A*01162": "A*01:162N",
"A*01178": "A*01:178N",
"A*01179": "A*01:179N",
"DRB5*02ZB": "DRB5*02:UTV",
}
db.save_dict(db_connection, table_name='v2_mapping',
dictionary=v2_to_v3_example, columns=('v2', 'v3'))
43 changes: 41 additions & 2 deletions pyard/db.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# -*- coding: utf-8 -*-
#
# py-ard
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 3 of the License, or (at
# your option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import pathlib
import sqlite3
from typing import Tuple, Dict, Set, List
Expand Down Expand Up @@ -92,7 +114,7 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
:return: List of alleles
"""
serology_query = "SELECT allele_list from serology_mapping where serology = ?"
cursor = connection.execute(serology_query, (serology, ))
cursor = connection.execute(serology_query, (serology,))
result = cursor.fetchone()
cursor.close()
if result:
Expand All @@ -102,6 +124,23 @@ def serology_to_alleles(connection: sqlite3.Connection, serology: str) -> List[s
return alleles


def v2_to_v3_allele(connection: sqlite3.Connection, v2_allele: str) -> str:
"""
Look up V3 version of the allele in the database.

:param connection: db connection of type sqlite.Connection
:param v2_allele: V2 allele
:return: V3 allele
"""
v2_query = "SELECT v3 from v2_mapping where v2 = ?"
cursor = connection.execute(v2_query, (v2_allele,))
result = cursor.fetchone()
cursor.close()
if result:
return result[0]
return ''


def is_valid_mac_code(connection: sqlite3.Connection, code: str) -> bool:
"""
Check db if the MAC code exists.
Expand Down Expand Up @@ -215,4 +254,4 @@ def load_dict(connection: sqlite3.Connection, table_name: str, columns: Tuple[st
cursor.execute(select_all_query)
table_as_dict = {k: v for k, v in cursor.fetchall()}
cursor.close()
return table_as_dict
return table_as_dict
40 changes: 34 additions & 6 deletions pyard/pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@

from . import db
from .data_repository import generate_ars_mapping, generate_mac_codes, generate_alleles_and_xx_codes, \
generate_serology_mapping
from .db import is_valid_mac_code, mac_code_to_alleles
generate_serology_mapping, generate_v2_to_v3_mapping
from .db import is_valid_mac_code, mac_code_to_alleles, v2_to_v3_allele
from .smart_sort import smart_sort_comparator

HLA_regex = re.compile("^HLA-")
Expand Down Expand Up @@ -66,6 +66,8 @@ def __init__(self, imgt_version: str = 'Latest',
self.dup_g, self._G, self._lg, self._lgx = generate_ars_mapping(self.db_connection, imgt_version)
# Load Serology mappings
generate_serology_mapping(self.db_connection, imgt_version)
# Load V2 to V3 mappings
generate_v2_to_v3_mapping(self.db_connection, imgt_version)

# Close the current read-write db connection
self.db_connection.close()
Expand Down Expand Up @@ -172,6 +174,11 @@ def redux_gl(self, glstring: str, redux_type: str) -> str:
return "/".join(sorted(set([self.redux_gl(a, redux_type) for a in glstring.split("/")]),
key=functools.cmp_to_key(smart_sort_comparator)))

# Handle V2 to V3 mapping
if self.is_v2(glstring):
glstring = self._map_v2_to_v3(glstring)
return self.redux_gl(glstring, redux_type)

# Handle Serology
if self.is_serology(glstring):
alleles = self._get_alleles_from_serology(glstring)
Expand Down Expand Up @@ -232,6 +239,17 @@ def is_mac(gl: str) -> bool:
"""
return re.search(r":\D+", gl) is not None

@staticmethod
def is_v2(allele: str) -> bool:
"""
Version 2 of the nomenclature is a single field.
It does not have any ':' field separator.
Eg: A*0104
:param allele: Possible allele
:return: Is the allele in V2 nomenclature
"""
return '*' in allele and not ':' in allele

def _is_valid_allele(self, allele):
"""
Test if allele is valid in the current imgt database
Expand All @@ -255,7 +273,7 @@ def _get_alleles(self, code, locus_antigen) -> Iterable[str]:
# else it's a group expansion
is_allelic_expansion = any([':' in allele for allele in alleles])
if is_allelic_expansion:
locus = locus_antigen.split('*')[0] # Just keep the locus name
locus = locus_antigen.split('*')[0] # Just keep the locus name
alleles = [f'{locus}*{a}' for a in alleles]
else:
alleles = [f'{locus_antigen}:{a}' for a in alleles]
Expand All @@ -272,6 +290,14 @@ def _get_alleles_from_serology(self, serology) -> Iterable[str]:
else:
return alleles

def _map_v2_to_v3(self, v2_allele):
"""
Get V3 version of V2 versioned allele
:param v2_allele: V2 versioned allele
:return: V3 versioned allele
"""
return v2_to_v3_allele(self.db_connection, v2_allele)

def isvalid(self, allele: str) -> bool:
"""
Determines validity of an allele
Expand All @@ -283,7 +309,9 @@ def isvalid(self, allele: str) -> bool:
"""
if allele == '':
return False
if not self.is_mac(allele) and not self.is_serology(allele):
if not self.is_mac(allele) and \
not self.is_serology(allele) and \
not self.is_v2(allele):
# Alleles ending with P or G are valid_alleles
if allele.endswith(('P', 'G')):
# remove the last character
Expand Down Expand Up @@ -330,7 +358,7 @@ def mac_toG(self, allele: str) -> str:
"""
locus_antigen, code = allele.split(":")
if HLA_regex.search(allele):
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
if is_valid_mac_code(self.db_connection, code):
alleles = self._get_alleles(code, locus_antigen)
group = [self.toG(a) for a in alleles]
Expand Down Expand Up @@ -370,7 +398,7 @@ def expand_mac(self, mac_code: str):
locus_antigen, code = mac_code.split(":")
if is_valid_mac_code(self.db_connection, code):
if HLA_regex.search(mac_code):
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix
return ['HLA-' + a for a in self._get_alleles(code, locus_antigen)]
else:
return list(self._get_alleles(code, locus_antigen))
Expand Down
9 changes: 8 additions & 1 deletion pyard/smart_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import re

expr_regex = re.compile('[NQLSGg]')
glstring_chars = re.compile('[/|+^~]')


@functools.lru_cache(maxsize=1000)
Expand All @@ -43,6 +44,13 @@ def smart_sort_comparator(a1, a2):
if a1 == a2:
return 0

# GL String matches
if re.search(glstring_chars, a1) or re.search(glstring_chars, a2):
if a1 > a2:
return 1
else:
return -1

# remove any non-numerics
a1 = re.sub(expr_regex, '', a1)
a2 = re.sub(expr_regex, '', a2)
Expand Down Expand Up @@ -92,4 +100,3 @@ def smart_sort_comparator(a1, a2):

# All fields are considered equal after 4th field
return 0

69 changes: 69 additions & 0 deletions scripts/pyard
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# py-ard
# Copyright (c) 2020 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation; either version 3 of the License, or (at
# your option) any later version.
#
# This library is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
# License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this library; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
#
# > http://www.fsf.org/licensing/licenses/lgpl.html
# > http://www.opensource.org/licenses/lgpl-license.php
#
import argparse

import pyard


def get_imgt_version(imgt_version):
if imgt_version:
version = imgt_version.replace('.', '')
if version.isdigit():
return version
raise RuntimeError(f"{imgt_version} is not a valid IMGT database version number")
return None


if __name__ == '__main__':
parser = argparse.ArgumentParser(
usage="""[-v <IMGT DB Version>] [gl-string redux_type]""",
description="""py-ard tool to redux GL String"""
)
parser.add_argument(
"-v",
"--imgt-version",
dest="imgt_version"
)
parser.add_argument(
"--gl",
required=True,
dest="gl_string"
)
parser.add_argument(
"-r",
choices=['G', 'lg', 'lgx'],
required=True,
dest="redux_type"
)

args = parser.parse_args()

imgt_version = get_imgt_version(args.imgt_version)
if imgt_version:
ard = pyard.ARD(imgt_version)
else:
ard = pyard.ARD()

print(ard.redux_gl(args.gl_string, args.redux_type))
del ard
Loading