Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyard/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@
from .pyard import ARD

__author__ = """NMDP Bioinformatics"""
__version__ = '0.0.13'
__version__ = '0.0.21'
35 changes: 28 additions & 7 deletions pyard/pyard.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,27 +393,40 @@ def redux(self, allele: str, ars_type: str) -> str:
"""

# PERFORMANCE: precompiled regex
# dealing with leading HLA-

# dealing with leading 'HLA-'
if self.HLA_regex.search(allele):
hla, allele_name = allele.split("-")
return "-".join(["HLA", self.redux(allele_name, ars_type)])

# Alleles ending with P or G are valid
if allele.endswith(('P', 'G')):
allele = allele[:-1]

if ars_type == "G" and allele in self._G:
if allele in self.dup_g:
return self.dup_g[allele]
else:
return self.G[allele]
elif ars_type == "lg" and allele in self._lg:
return self.lg[allele]
elif ars_type == "lgx" and allele in self._lgx:
return self.lgx[allele]
elif ars_type == "lg":
if allele in self._lg:
return self.lg[allele]
else:
# for 'lg' when allele is not in G group,
# return allele with only first 2 field
return ':'.join(allele.split(':')[0:2]) + 'g'
elif ars_type == "lgx":
if allele in self._lgx:
return self.lgx[allele]
else:
# for 'lgx' when allele is not in G group,
# return allele with only first 2 field
return ':'.join(allele.split(':')[0:2])
else:
if self.remove_invalid:
if allele in self.valid:
return allele
else:
return
return ''
else:
return allele

Expand Down Expand Up @@ -488,6 +501,14 @@ def isvalid(self, allele: str) -> bool:
if not ismac(allele):
# PERFORMANCE: use hash instead of allele in "list"
# return allele in self.valid
# Alleles ending with P or G are valid
if allele.endswith(('P', 'G')):
# remove the last character
allele = allele[:-1]
# validate allele without the 'HLA-' prefix
if self.HLA_regex.search(allele):
# remove 'HLA-' prefix
allele = allele[4:]
return self.valid_dict.get(allele, False)
return True

Expand Down
73 changes: 21 additions & 52 deletions pyard/smart_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@

expr_regex = re.compile('[NQLSGg]')


@functools.lru_cache(maxsize=None)
def smart_sort_comparator(a1, a2):
"""
Natural sort 2 given alleles.

Python sorts strings lexographically but HLA alleles need
Python sorts strings lexicographically but HLA alleles need
to be sorted by numerical values in each field of the HLA nomenclature.

:param a1: first allele
Expand All @@ -19,85 +20,53 @@ def smart_sort_comparator(a1, a2):
if a1 == a2:
return 0


# remove any non-numerics
a1 = re.sub(expr_regex, '', a1)
a2 = re.sub(expr_regex, '', a2)

# Check to see if they are still the same alleles
if a1 == a2:
return 0

# Extract and Compare first fields first
a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])
a1_f1 = int(a1[a1.find('*') + 1:a1.find(':')])
a2_f1 = int(a2[a2.find('*') + 1:a2.find(':')])

if a1_f1 < a2_f1:
return -1
if a1_f1 > a2_f1:
return 1

# If the first fields are equal, try the 2nd fields
a1_fields = a1.split(':')
a2_fields = a2.split(':')

a1_f2 = int(a1.split(':')[1])
a2_f2 = int(a2.split(':')[1])
# If the first fields are equal, try the 2nd fields
a1_f2 = int(a1_fields[1])
a2_f2 = int(a2_fields[1])

if a1_f2 < a2_f2:
return -1
if a1_f2 > a2_f2:
return 1

# If the two fields are equal, try the 3rd fields

a1_f3 = int(a1.split(':')[2])
a2_f3 = int(a2.split(':')[2])
# If the second fields are equal, try the 3rd fields
a1_f3 = int(a1_fields[2])
a2_f3 = int(a2_fields[2])

if a1_f3 < a2_f3:
return -1
if a1_f3 > a2_f3:
return 1

# If the two fields are equal, try the 4th fields

a1_f4 = int(a1.split(':')[3])
a2_f3 = int(a2.split(':')[3])
# If the third fields are equal, try the 4th fields
a1_f4 = int(a1_fields[3])
a2_f4 = int(a2_fields[3])

if a1_f4 < a2_f4:
return -1
if a1_f4 > a2_f4:
return 1



# All fields are equal
# All fields are considered equal after 4th field
return 0

def smart_sort_alleles(a1, a2):
"""
Natural sort 2 given alleles.

Python sorts strings lexographically but HLA alleles need
to be sorted by numerical values in each field of the HLA nomenclature.

:param a1: first allele
:param a2: second allele
"""
# Check to see if they are the same alleles
if a1 == a2:
return [a1, a2]

# Extract and Compare first fields first
a1_f1 = int(a1[a1.find('*')+1:a1.find(':')])
a2_f1 = int(a2[a2.find('*')+1:a2.find(':')])

if a1_f1 < a2_f1:
return [a1, a2]
if a1_f1 > a2_f1:
return [a2, a1]

# If the first fields are equal, try the 2nd fields
a1_f2 = int(a1[a1.find(':')+1:])
a2_f2 = int(a2[a2.find(':')+1:])

if a1_f2 < a2_f2:
return [a1, a2]
if a1_f2 > a2_f2:
return [a2, a1]

# All fields are equal
return [a1, a2]
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[bumpversion]
current_version = 0.0.13
current_version = 0.0.21
commit = True
tag = True

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name='py-ard',
version='0.0.18',
version='0.0.21',
description="ARD reduction for HLA with python",
long_description=readme + '\n\n' + history,
author="CIBMTR",
Expand Down
94 changes: 37 additions & 57 deletions tests/test_pyard.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

#
# pyars pyARS.
# py-ard pyARD.
# Copyright (c) 2018 Be The Match operated by National Marrow Donor Program. All Rights Reserved.
#
# This library is free software; you can redistribute it and/or modify it
Expand All @@ -24,75 +23,56 @@
#

"""
test_pyars
test_pyard
----------------------------------

Tests for `pyars` module.
Tests for `py-ard` module.
"""
import os
import sys
import json
import os
import unittest

from pyard import ARD


class TestPyard(unittest.TestCase):
class TestPyArd(unittest.TestCase):

@classmethod
def setUpClass(cls) -> None:
cls.db_version = '3290'
cls.ard = ARD(cls.db_version, data_dir='/tmp/3290')

def setUp(self):
self.ard = ARD(verbose=True)
self.data_dir = os.path.dirname(__file__)
self.assertIsInstance(self.ard, ARD)
expected_json = self.data_dir + "/expected.json"
with open(expected_json) as json_data:
self.expected = json.load(json_data)
pass

def test_000_nomac(self):
self.ardnomac = ARD(download_mac=False)
self.assertIsInstance(self.ardnomac, ARD)
self.assertFalse(self.ardnomac.download_mac)
self.assertTrue(len(self.ardnomac.mac.keys()) == 0)
self.assertTrue(self.ardnomac.redux("A*01:01:01", 'G') == "A*01:01:01G")
self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lg') == "A*01:01g")
self.assertTrue(self.ardnomac.redux("A*01:01:01", 'lgx') == "A*01:01")
self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'G') == "HLA-A*01:01:01G")
self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lg') == "HLA-A*01:01g")
self.assertTrue(self.ardnomac.redux("HLA-A*01:01:01", 'lgx') == "HLA-A*01:01")
pass

def test_001_dbversions(self):
for db in ['3310', '3300', '3290', '3280']:
self.arddb = ARD(dbversion=db, download_mac=False)
self.assertIsInstance(self.arddb, ARD)
self.assertFalse(self.arddb.download_mac)
self.assertTrue(self.arddb.dbversion == db)
self.assertTrue(self.arddb.redux("A*01:01:01", 'G') == "A*01:01:01G")
self.assertTrue(self.arddb.redux("A*01:01:01", 'lg') == "A*01:01g")
self.assertTrue(self.arddb.redux("A*01:01:01", 'lgx') == "A*01:01")
pass

def test_002_remove_invalid(self):
self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
pass

def test_003_mac(self):
self.assertTrue(self.ard.redux_gl("A*01:AB", 'G') == "A*01:01:01G/A*01:02")
self.assertTrue(self.ard.redux_gl("HLA-A*01:AB", 'G') == "HLA-A*01:01:01G/HLA-A*01:02")
pass

def test_004_redux_gl(self):
for ex in self.expected['redux_gl']:
def test_no_mac(self):
self.ard_no_mac = ARD(self.db_version, data_dir='/tmp/3290', download_mac=False)
self.assertIsInstance(self.ard_no_mac, ARD)
self.assertEqual(len(self.ard_no_mac.mac.keys()), 0)
self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'G'), "A*01:01:01G")
self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lg'), "A*01:01g")
self.assertEqual(self.ard_no_mac.redux("A*01:01:01", 'lgx'), "A*01:01")
self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'G'), "HLA-A*01:01:01G")
self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lg'), "HLA-A*01:01g")
self.assertEqual(self.ard_no_mac.redux("HLA-A*01:01:01", 'lgx'), "HLA-A*01:01")

def test_remove_invalid(self):
self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")

def test_mac(self):
self.assertEqual(self.ard.redux_gl("A*01:AB", 'G'), "A*01:01:01G/A*01:02")
self.assertEqual(self.ard.redux_gl("HLA-A*01:AB", 'G'), "HLA-A*01:01:01G/HLA-A*01:02")

def test_redux_gl(self):
data_dir = os.path.dirname(__file__)
expected_json = data_dir + "/expected.json"
with open(expected_json) as json_data:
expected = json.load(json_data)
for ex in expected['redux_gl']:
glstring = ex['glstring']
ard_type = ex['ard_type']
expected_gl = ex['expected_gl']
self.assertTrue(self.ard.redux_gl(glstring, ard_type) == expected_gl)
pass

def test_005_mac_G(self):
self.assertTrue(self.ard.redux("A*01:01:01", 'G') == "A*01:01:01G")
pass



self.assertEqual(self.ard.redux_gl(glstring, ard_type), expected_gl)

def test_mac_G(self):
self.assertEqual(self.ard.redux("A*01:01:01", 'G'), "A*01:01:01G")
Loading