diff --git a/extras/reduce_conf.json b/extras/reduce_conf.json index a219e94..6c4e737 100644 --- a/extras/reduce_conf.json +++ b/extras/reduce_conf.json @@ -30,6 +30,7 @@ "apply_compression": "gzip", "reduce_serology": false, "reduce_v2": true, + "convert_v2_to_v3": false, "reduce_3field": true, "reduce_P": true, "reduce_XX": false, diff --git a/pyard/__init__.py b/pyard/__init__.py index a419903..59b08c1 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -24,4 +24,4 @@ from .pyard import ARD __author__ = """NMDP Bioinformatics""" -__version__ = '0.6.9' +__version__ = '0.6.10' diff --git a/scripts/pyard-reduce-csv b/scripts/pyard-reduce-csv index 59e453c..40ab179 100755 --- a/scripts/pyard-reduce-csv +++ b/scripts/pyard-reduce-csv @@ -40,8 +40,7 @@ import pyard.drbx as drbx def is_serology(allele: str) -> bool: - if len(allele.split(':')) == 1: - return True + return ard.is_serology(allele) def is_3field(allele: str) -> bool: @@ -52,7 +51,11 @@ def is_P(allele: str) -> bool: if allele.endswith('P'): fields = allele.split(':') if len(fields) == 2: # Ps are 2 fields - return fields[0].isdigit() and fields[0].isdigit() + # Check both fields are digits only + # Eg: A*02:01P + # Check last 2 digits of first field: 02 is numeric + # Check digits of seconds field: 01 is numeric + return fields[0][-2:].isdigit() and fields[1][:-1].isdigit() return False @@ -83,6 +86,11 @@ def should_be_reduced(allele, locus_allele): return False +def remove_locus_name(reduced_allele): + return "/".join(map(lambda a: a.split('*')[1], + reduced_allele.split('/'))) + + def reduce(allele, locus, column_name): # Does the allele name have the locus in it ? if allele == '': @@ -111,16 +119,23 @@ def reduce(allele, locus, column_name): if ard_config["keep_locus_in_allele_name"]: allele = reduced_allele else: - allele = "/".join(map(lambda a: a.split('*')[1], - reduced_allele.split('/'))) + allele = remove_locus_name(reduced_allele) else: if verbose: print(f"Failed to reduce {locus_allele}") - if verbose: print(f"\t{locus_allele} => {allele}") else: - if ard_config["keep_locus_in_allele_name"]: + if ard_config['convert_v2_to_v3']: + if ard.is_v2(locus_allele): + v3_allele = ard.v2_to_v3(locus_allele) + if not ard_config["keep_locus_in_allele_name"]: + allele = remove_locus_name(v3_allele) + else: + allele = v3_allele + if verbose: + print(f"\t{locus_allele} => {allele}") + elif ard_config["keep_locus_in_allele_name"]: allele = locus_allele return allele @@ -170,7 +185,7 @@ if __name__ == '__main__': sys.exit(1) # Instantiate py-ard object with the latest - ard = pyard.ARD(remove_invalid=False) + ard = pyard.ARD() # Read the Input File # Read only the columns to be saved. diff --git a/setup.cfg b/setup.cfg index 9917d39..979da02 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.6.9 +current_version = 0.6.10 commit = True tag = True diff --git a/setup.py b/setup.py index f3cc342..bf86ec5 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ setup( name='py-ard', - version='0.6.9', + version='0.6.10', description="ARD reduction for HLA with Python", long_description=readme + '\n\n' + history, author="CIBMTR",