From e3471dc4b0583f1ffdfb7eefeda74a3efe9302aa Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 7 Dec 2021 14:10:58 -0600 Subject: [PATCH 1/3] Remove remove_invalid arg from ARD --- scripts/pyard-reduce-csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/pyard-reduce-csv b/scripts/pyard-reduce-csv index 59e453c..a0c6bc6 100755 --- a/scripts/pyard-reduce-csv +++ b/scripts/pyard-reduce-csv @@ -170,7 +170,7 @@ if __name__ == '__main__': sys.exit(1) # Instantiate py-ard object with the latest - ard = pyard.ARD(remove_invalid=False) + ard = pyard.ARD() # Read the Input File # Read only the columns to be saved. From 4ec4cc460842a929941f64577295323167e64622 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 7 Dec 2021 14:12:12 -0600 Subject: [PATCH 2/3] =?UTF-8?q?Bump=20version:=200.6.9=20=E2=86=92=200.6.1?= =?UTF-8?q?0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyard/__init__.py b/pyard/__init__.py index a419903..59b08c1 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -24,4 +24,4 @@ from .pyard import ARD __author__ = """NMDP Bioinformatics""" -__version__ = '0.6.9' +__version__ = '0.6.10' diff --git a/setup.cfg b/setup.cfg index 9917d39..979da02 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.9 +current_version = 0.6.10 commit = True tag = True diff --git a/setup.py b/setup.py index f3cc342..bf86ec5 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name='py-ard', - version='0.6.9', + version='0.6.10', description="ARD reduction for HLA with Python", long_description=readme + '\n\n' + history, author="CIBMTR", From e329e5e39ed21147e8fd1e9f07ceb593363b445c Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Fri, 10 Dec 2021 15:30:06 -0600 Subject: [PATCH 3/3] Batch Reduce fixes: - Fix serology check - Fix issue with P groups - A new flag `convert_v2_to_v3` to convert v2 to v3 but not reduce. --- extras/reduce_conf.json | 1 + scripts/pyard-reduce-csv | 29 ++++++++++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/extras/reduce_conf.json b/extras/reduce_conf.json index a219e94..6c4e737 100644 --- a/extras/reduce_conf.json +++ b/extras/reduce_conf.json @@ -30,6 +30,7 @@ "apply_compression": "gzip", "reduce_serology": false, "reduce_v2": true, + "convert_v2_to_v3": false, "reduce_3field": true, "reduce_P": true, "reduce_XX": false, diff --git a/scripts/pyard-reduce-csv b/scripts/pyard-reduce-csv index a0c6bc6..40ab179 100755 --- a/scripts/pyard-reduce-csv +++ b/scripts/pyard-reduce-csv @@ -40,8 +40,7 @@ import pyard.drbx as drbx def is_serology(allele: str) -> bool: - if len(allele.split(':')) == 1: - return True + return ard.is_serology(allele) def is_3field(allele: str) -> bool: @@ -52,7 +51,11 @@ def is_P(allele: str) -> bool: if allele.endswith('P'): fields = allele.split(':') if len(fields) == 2: # Ps are 2 fields - return fields[0].isdigit() and fields[0].isdigit() + # Check both fields are digits only + # Eg: A*02:01P + # Check last 2 digits of first field: 02 is numeric + # Check digits of seconds field: 01 is numeric + return fields[0][-2:].isdigit() and fields[1][:-1].isdigit() return False @@ -83,6 +86,11 @@ def should_be_reduced(allele, locus_allele): return False +def remove_locus_name(reduced_allele): + return "/".join(map(lambda a: a.split('*')[1], + reduced_allele.split('/'))) + + def reduce(allele, locus, column_name): # Does the allele name have the locus in it ? if allele == '': @@ -111,16 +119,23 @@ def reduce(allele, locus, column_name): if ard_config["keep_locus_in_allele_name"]: allele = reduced_allele else: - allele = "/".join(map(lambda a: a.split('*')[1], - reduced_allele.split('/'))) + allele = remove_locus_name(reduced_allele) else: if verbose: print(f"Failed to reduce {locus_allele}") - if verbose: print(f"\t{locus_allele} => {allele}") else: - if ard_config["keep_locus_in_allele_name"]: + if ard_config['convert_v2_to_v3']: + if ard.is_v2(locus_allele): + v3_allele = ard.v2_to_v3(locus_allele) + if not ard_config["keep_locus_in_allele_name"]: + allele = remove_locus_name(v3_allele) + else: + allele = v3_allele + if verbose: + print(f"\t{locus_allele} => {allele}") + elif ard_config["keep_locus_in_allele_name"]: allele = locus_allele return allele