From de49ecf610cd5bde596a338652cb8beaac1858d8 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 6 Oct 2025 14:00:19 -0500 Subject: [PATCH 01/24] create Specialized Handler Classes for allele, glstring, mac etc. --- pyard/ard_refactored.py | 463 ++++++++++++++++++++++++++ pyard/handlers/__init__.py | 19 ++ pyard/handlers/allele_reducer.py | 147 ++++++++ pyard/handlers/gl_string_processor.py | 108 ++++++ pyard/handlers/mac_handler.py | 103 ++++++ pyard/handlers/serology_handler.py | 41 +++ pyard/handlers/shortnull_handler.py | 21 ++ pyard/handlers/v2_handler.py | 77 +++++ pyard/handlers/xx_handler.py | 23 ++ 9 files changed, 1002 insertions(+) create mode 100644 pyard/ard_refactored.py create mode 100644 pyard/handlers/__init__.py create mode 100644 pyard/handlers/allele_reducer.py create mode 100644 pyard/handlers/gl_string_processor.py create mode 100644 pyard/handlers/mac_handler.py create mode 100644 pyard/handlers/serology_handler.py create mode 100644 pyard/handlers/shortnull_handler.py create mode 100644 pyard/handlers/v2_handler.py create mode 100644 pyard/handlers/xx_handler.py diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py new file mode 100644 index 0000000..4b625c2 --- /dev/null +++ b/pyard/ard_refactored.py @@ -0,0 +1,463 @@ +# -*- coding: utf-8 -*- + +import functools +import sys +from typing import Union, List + +from . import data_repository as dr +from . import db +from . import smart_sort +from .constants import ( + HLA_regex, + VALID_REDUCTION_TYPES, + DEFAULT_CACHE_SIZE, + G_GROUP_LOCI, +) +from .exceptions import InvalidMACError, InvalidTypingError +from .handlers import ( + AlleleReducer, + GLStringProcessor, + MACHandler, + SerologyHandler, + V2Handler, + XXHandler, + ShortNullHandler, +) +from .misc import get_2field_allele, is_2_field_allele +from .serology import SerologyMapping + +default_config = { + "reduce_serology": True, + "reduce_v2": True, + "reduce_3field": True, + "reduce_P": True, + "reduce_XX": True, + "reduce_MAC": True, + "reduce_shortnull": True, + "ping": True, + "verbose_log": False, + "ARS_as_lg": False, + "strict": True, + "ignore_allele_with_suffixes": (), +} + + +class ARD(object): + """ + ARD reduction for HLA - Refactored with specialized handlers + """ + + def __init__( + self, + imgt_version: str = "Latest", + data_dir: str = None, + load_mac: bool = True, + max_cache_size: int = DEFAULT_CACHE_SIZE, + config: dict = None, + ): + self._data_dir = data_dir + self._config = default_config.copy() + if config: + self._config.update(config) + + # Initialize database and mappings + self._initialize_database(imgt_version, load_mac) + + # Initialize specialized handlers + self._initialize_handlers() + + # Setup caching + self._setup_caching(max_cache_size) + + # Freeze reference data for Python >= 3.9 + self._freeze_reference_data() + + # Reopen connection in read-only mode + self.db_connection, _ = db.create_db_connection(data_dir, imgt_version, ro=True) + + def _initialize_database(self, imgt_version: str, load_mac: bool): + """Initialize database connection and load all mappings""" + self.db_connection, _ = db.create_db_connection(self._data_dir, imgt_version) + + # Load ARD mappings + self.ars_mappings = dr.generate_ard_mapping(self.db_connection, imgt_version) + + # Load Alleles and XX Codes + ( + self.code_mappings, + self.allele_group, + ) = dr.generate_alleles_and_xx_codes_and_who( + self.db_connection, imgt_version, self.ars_mappings + ) + + # Generate short nulls + self.shortnulls = dr.generate_short_nulls( + self.db_connection, self.code_mappings.who_group + ) + + # Load Serology mappings + broad_splits_mapping, associated_mapping = dr.generate_broad_splits_mapping( + self.db_connection, imgt_version + ) + self.serology_mapping = SerologyMapping( + broad_splits_mapping, associated_mapping + ) + dr.generate_serology_mapping( + self.db_connection, imgt_version, self.serology_mapping, self._redux_allele + ) + self.valid_serology_set = SerologyMapping.get_valid_serology_names() + + # Load other mappings + dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version) + dr.set_db_version(self.db_connection, imgt_version) + dr.generate_mac_codes(self.db_connection, refresh_mac=False, load_mac=load_mac) + dr.generate_cwd_mapping(self.db_connection) + + self.db_connection.close() + + def _initialize_handlers(self): + """Initialize all specialized handlers""" + self.allele_reducer = AlleleReducer(self) + self.gl_processor = GLStringProcessor(self) + self.mac_handler = MACHandler(self) + self.serology_handler = SerologyHandler(self) + self.v2_handler = V2Handler(self) + self.xx_handler = XXHandler(self) + self.shortnull_handler = ShortNullHandler(self) + + def _setup_caching(self, max_cache_size: int): + """Setup caching for performance""" + if max_cache_size != DEFAULT_CACHE_SIZE: + self._redux_allele = functools.lru_cache(maxsize=max_cache_size)( + self._redux_allele + ) + self.redux = functools.lru_cache(maxsize=max_cache_size)(self.redux) + self.is_mac = functools.lru_cache(maxsize=max_cache_size)( + self.mac_handler.is_mac + ) + self.smart_sort_comparator = functools.lru_cache(maxsize=max_cache_size)( + smart_sort.smart_sort_comparator + ) + else: + self.smart_sort_comparator = smart_sort.smart_sort_comparator + + @staticmethod + def _freeze_reference_data(): + """Freeze reference data for Python >= 3.9""" + if sys.version_info.major == 3 and sys.version_info.minor >= 9: + import gc + + gc.freeze() + + def __del__(self): + """Close database connection when ARD instance is destroyed""" + if hasattr(self, "db_connection") and self.db_connection: + self.db_connection.close() + + @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) + def _redux_allele( + self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True + ) -> str: + """Core allele reduction with ping logic""" + # Handle HLA- prefix + if HLA_regex.search(allele): + hla, allele_name = allele.split("-") + redux_allele = self._redux_allele(allele_name, redux_type) + if redux_allele: + if "/" in redux_allele: + return "/".join([f"HLA-{ra}" for ra in redux_allele.split("/")]) + return f"HLA-{redux_allele}" + return redux_allele + + if not self._config["strict"]: + allele = self._get_non_strict_allele(allele) + + # Handle P/G suffixes + if allele.endswith(("P", "G")) and redux_type in ["lg", "lgx", "G"]: + allele = allele[:-1] + + # Handle ping mode + if self._config["ping"] and re_ping and redux_type in ("lg", "lgx", "U2"): + if allele in self.ars_mappings.p_not_g: + not_g_allele = self.ars_mappings.p_not_g[allele] + if redux_type == "lg": + return self.allele_reducer._add_lg_suffix(not_g_allele) + return not_g_allele + else: + redux_allele = self._redux_allele(allele, redux_type, False) + if redux_allele.endswith("g"): + no_suffix_allele = redux_allele[:-1] + elif redux_allele.endswith("ARS"): + no_suffix_allele = redux_allele[:-3] + else: + no_suffix_allele = redux_allele + + if ( + no_suffix_allele == allele + or "/" in no_suffix_allele + or no_suffix_allele in self.ars_mappings.p_not_g.values() + ): + return redux_allele + + twice_redux_allele = self._redux_allele( + no_suffix_allele, redux_type, False + ) + if "/" in twice_redux_allele: + return twice_redux_allele + if self._is_valid_allele(twice_redux_allele): + return twice_redux_allele + + return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) + + @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) + def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str: + """Main redux method using specialized handlers""" + # Handle GL string delimiters first + processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) + if processed_gl != glstring or self.is_glstring(processed_gl): + return processed_gl + + # Handle ignored allele suffixes + if self._config["ignore_allele_with_suffixes"]: + _, fields = glstring.split("*") + if fields in self._config["ignore_allele_with_suffixes"]: + return glstring + + # Handle V2 to V3 mapping + if self.v2_handler.is_v2(glstring): + glstring = self.v2_handler.map_v2_to_v3(glstring) + return self.redux(glstring, redux_type) + + # Handle Serology + if self._config["reduce_serology"] and self.serology_handler.is_serology( + glstring + ): + alleles = self.serology_handler.get_alleles_from_serology(glstring) + if alleles: + return self.redux("/".join(alleles), redux_type) + return "" + + is_hla_prefix = HLA_regex.search(glstring) + if is_hla_prefix: + allele = glstring.split("-")[1] + else: + allele = glstring + # Validate format + if ":" in allele: + loc_allele = allele.split(":") + if len(loc_allele) < 2: + raise InvalidTypingError( + f"{glstring} is not a valid V2 or Serology typing." + ) + loc_antigen, code = loc_allele[0], loc_allele[1] + # Check for empty fields (like DQA1*01:01:01:G where G is after empty field) + if any(field == "" for field in loc_allele[1:]): + raise InvalidTypingError( + f"{glstring} is not a valid V2 or Serology typing." + ) + else: + if "*" in allele: + locus, _ = allele.split("*") + if locus not in G_GROUP_LOCI: + return glstring + raise InvalidTypingError( + f"{glstring} is not a valid V2 or Serology typing." + ) + + # Handle XX codes + if ( + self._config["reduce_XX"] + and code == "XX" + and self.xx_handler.is_xx(allele, loc_antigen, code) + ): + reduced_alleles = self.redux( + "/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type + ) + if is_hla_prefix: + return "/".join([f"HLA-{a}" for a in reduced_alleles.split("/")]) + return reduced_alleles + + # Handle MAC + if self._config["reduce_MAC"] and code.isalpha(): + if self.mac_handler.is_mac(allele): + alleles = self.mac_handler._get_alleles(code, loc_antigen) + if is_hla_prefix: + alleles = [f"HLA-{a}" for a in alleles] + return self.redux("/".join(alleles), redux_type) + else: + raise InvalidMACError(f"{glstring} is an invalid MAC.") + + # Handle short nulls + if self._config["reduce_shortnull"] and self.shortnull_handler.is_shortnull( + glstring + ): + return self.redux("/".join(self.shortnulls[glstring]), redux_type) + + return self._redux_allele(glstring, redux_type) + + @staticmethod + def is_glstring(gl_string: str) -> bool: + return ( + "/" in gl_string or "+" in gl_string or "^" in gl_string or "~" in gl_string + ) + + # Delegate methods to handlers + def is_mac(self, allele: str) -> bool: + return self.mac_handler.is_mac(allele) + + def is_serology(self, allele: str) -> bool: + return self.serology_handler.is_serology(allele) + + def is_v2(self, allele: str) -> bool: + return self.v2_handler.is_v2(allele) + + def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: + return self.xx_handler.is_xx(glstring, loc_antigen, code) + + def is_shortnull(self, allele: str) -> bool: + return self.shortnull_handler.is_shortnull(allele) + + def is_null(self, allele: str) -> bool: + return self.shortnull_handler.is_null(allele) + + def expand_mac(self, mac_code: str) -> str: + return self.mac_handler.expand_mac(mac_code) + + def lookup_mac(self, allelelist_gl: str) -> str: + return self.mac_handler.lookup_mac(allelelist_gl) + + def find_broad_splits(self, allele: str) -> tuple: + return self.serology_handler.find_broad_splits(allele) + + def find_associated_antigen(self, serology: str) -> str: + return self.serology_handler.find_associated_antigen(serology) + + def find_xx_from_serology(self, serology: str) -> str: + return self.serology_handler.find_xx_from_serology(serology) + + def v2_to_v3(self, v2_allele: str) -> str: + return self.v2_handler.map_v2_to_v3(v2_allele) + + # Keep existing methods that don't fit into handlers + def validate(self, glstring: str) -> bool: + return self.gl_processor.validate_gl_string(glstring) + + def _get_non_strict_allele(self, allele: str) -> str: + """Handle non-strict allele validation""" + from .constants import expression_chars + + if not self._is_allele_in_db(allele): + for expr_char in expression_chars: + if self._is_allele_in_db(allele + expr_char): + if self._config["verbose_log"]: + print(f"{allele} is not valid. Using {allele}{expr_char}") + allele = allele + expr_char + break + return allele + + def _is_who_allele(self, allele: str) -> bool: + return allele in self.allele_group.who_alleles + + def _is_allele_in_db(self, allele: str) -> bool: + return allele in self.allele_group.alleles + + def _is_valid_allele(self, allele: str) -> bool: + if allele.endswith(("P", "G")): + allele = allele[:-1] + if "*" in allele: + _, fields = allele.split("*") + if not all(map(str.isalnum, fields.split(":"))): + return False + if self._is_allele_in_db(allele): + return True + else: + allele = get_2field_allele(allele) + return self._is_allele_in_db(allele) + + def _is_valid(self, allele: str) -> bool: + """Validate allele in various forms""" + if allele == "" or allele.endswith("*"): + return False + + if HLA_regex.search(allele): + allele = allele[4:] + + if "*" in allele: + alphanum_allele = allele.replace("*", "").replace(":", "") + if not alphanum_allele.isalnum(): + return False + + if self._config["ignore_allele_with_suffixes"]: + locus, fields = allele.split("*") + if fields in self._config["ignore_allele_with_suffixes"]: + return True + + if not self._config["strict"]: + allele = self._get_non_strict_allele(allele) + + if ( + not self.is_mac(allele) + and not self.is_XX(allele) + and not self.is_serology(allele) + and not self.is_v2(allele) + and not self.is_shortnull(allele) + ): + return self._is_valid_allele(allele) + + return True + + # Keep remaining methods unchanged + def is_exp_allele(self, allele: str) -> bool: + return allele in self.allele_group.exp_alleles + + def cwd_redux(self, allele_list_gl: str) -> str: + """CWD reduction using existing logic""" + alleles = [] + for allele in allele_list_gl.split("/"): + if self.is_mac(allele): + alleles.extend(self.expand_mac(allele).split("/")) + elif is_2_field_allele(allele) and not self.is_XX(allele): + alleles.append(allele) + else: + alleles.extend(self.redux(allele, "lgx").split("/")) + + locus = allele_list_gl.split("*")[0] + if HLA_regex.search(locus): + locus = locus.split("-")[1] + ciwd_for_locus = db.load_cwd(self.db_connection, locus) + + alleles_in_ciwd = ciwd_for_locus.intersection(alleles) + return "/".join(sorted(alleles_in_ciwd)) + + def refresh_mac_codes(self) -> None: + dr.generate_mac_codes(self.db_connection, refresh_mac=True) + + def get_db_version(self) -> str: + return dr.get_db_version(self.db_connection) + + def similar_alleles(self, prefix: str) -> Union[List, None]: + """Find similar alleles using existing logic""" + if "*" not in prefix: + return None + + locus, fields = prefix.split("*") + if fields: + if len(fields.split(":")) == 2: + first_field, mac_prefix = fields.split(":") + if mac_prefix.isalpha(): + similar_mac_names = db.similar_mac(self.db_connection, mac_prefix) + if similar_mac_names: + locus_prefix = f"{locus}*{first_field}" + mac_codes = [ + f"{locus_prefix}:{code}" for code in similar_mac_names + ] + return sorted(filter(lambda mac: self.is_mac(mac), mac_codes)) + + similar_allele_names = db.similar_alleles(self.db_connection, prefix) + if similar_allele_names: + return sorted( + similar_allele_names, + key=functools.cmp_to_key(smart_sort.smart_sort_comparator), + ) + + return None diff --git a/pyard/handlers/__init__.py b/pyard/handlers/__init__.py new file mode 100644 index 0000000..d9ab5f5 --- /dev/null +++ b/pyard/handlers/__init__.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +from .allele_reducer import AlleleReducer +from .gl_string_processor import GLStringProcessor +from .mac_handler import MACHandler +from .serology_handler import SerologyHandler +from .v2_handler import V2Handler +from .xx_handler import XXHandler +from .shortnull_handler import ShortNullHandler + +__all__ = [ + "AlleleReducer", + "GLStringProcessor", + "MACHandler", + "SerologyHandler", + "V2Handler", + "XXHandler", + "ShortNullHandler", +] diff --git a/pyard/handlers/allele_reducer.py b/pyard/handlers/allele_reducer.py new file mode 100644 index 0000000..b070304 --- /dev/null +++ b/pyard/handlers/allele_reducer.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- + +import functools +from typing import TYPE_CHECKING + +from ..constants import VALID_REDUCTION_TYPES, expression_chars +from ..exceptions import InvalidAlleleError +from ..misc import get_n_field_allele + +if TYPE_CHECKING: + from ..ard import ARD + + +class AlleleReducer: + """Handles core allele reduction logic""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def reduce_allele( + self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True + ) -> str: + """Core allele reduction logic extracted from _redux_allele""" + + if redux_type == "G" and allele in self.ard.ars_mappings.g_group: + if allele in self.ard.ars_mappings.dup_g: + return self.ard.ars_mappings.dup_g[allele] + else: + return self.ard.ars_mappings.g_group[allele] + + elif redux_type == "P" and allele in self.ard.ars_mappings.p_group: + return self.ard.ars_mappings.p_group[allele] + + elif redux_type in ["lgx", "lg"]: + if allele in self.ard.ars_mappings.lgx_group: + redux_allele = self.ard.ars_mappings.lgx_group[allele] + else: + redux_allele = ":".join(allele.split(":")[0:2]) + if redux_type == "lg": + return self._add_lg_suffix(redux_allele) + return redux_allele + + elif redux_type == "W": + if self.ard._is_who_allele(allele): + return allele + if allele in self.ard.code_mappings.who_group: + return self.ard.redux( + "/".join(self.ard.code_mappings.who_group[allele]), redux_type + ) + else: + return allele + + elif redux_type == "exon": + return self._handle_exon_reduction(allele) + + elif redux_type == "U2": + return self._handle_u2_reduction(allele) + + elif redux_type == "S": + return self._handle_serology_reduction(allele) + + else: + return self._handle_default_reduction(allele) + + def _add_lg_suffix(self, redux_allele): + """Add lg suffix to reduced allele""" + if "/" in redux_allele: + return "/".join( + [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] + ) + if self.ard._config["ARS_as_lg"]: + return redux_allele + "ARS" + return redux_allele + "g" + + def _handle_exon_reduction(self, allele): + """Handle exon reduction type""" + if allele in self.ard.ars_mappings.exon_group: + exon_group_allele = self.ard.ars_mappings.exon_group[allele] + last_char = allele[-1] + if last_char in expression_chars: + exon_short_null_allele = exon_group_allele + last_char + if self.ard.is_shortnull(exon_short_null_allele): + return exon_short_null_allele + return exon_group_allele + else: + w_redux = self.ard.redux(allele, "W") + if w_redux == allele or len(w_redux.split(":")) == 2: + return allele + else: + return self.ard.redux(w_redux, "exon") + + def _handle_u2_reduction(self, allele): + """Handle U2 reduction type""" + allele_fields = allele.split(":") + if len(allele_fields) == 2: + return allele + allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) + if self.ard._is_allele_in_db(allele_2_fields): + return allele_2_fields + else: + return self.reduce_allele(allele, "lgx") + + def _handle_serology_reduction(self, allele): + """Handle serology reduction type""" + from .. import db + from ..misc import is_2_field_allele + + if is_2_field_allele(allele): + allele = self.reduce_allele(allele, "lgx") + serology_mapping = db.find_serology_for_allele( + self.ard.db_connection, allele, "lgx_allele_list" + ) + else: + serology_mapping = db.find_serology_for_allele( + self.ard.db_connection, allele + ) + + serology_set = set() + for serology, allele_list in serology_mapping.items(): + if allele in allele_list.split("/"): + serology_set.add(serology) + + if not serology_set and is_2_field_allele(allele): + for serology, allele_list in serology_mapping.items(): + allele_list_lgx = self.ard.redux(allele_list, "lgx") + if allele in allele_list_lgx.split("/"): + serology_set.add(serology) + + return "/".join( + sorted( + serology_set, key=functools.cmp_to_key(self.ard.smart_sort_comparator) + ) + ) + + def _handle_default_reduction(self, allele): + """Handle default reduction cases""" + if allele.endswith("P"): + if allele in self.ard.ars_mappings.p_group.values(): + return allele + elif allele.endswith("G"): + if allele in self.ard.ars_mappings.g_group.values(): + return allele + + if self.ard._is_allele_in_db(allele): + return allele + else: + raise InvalidAlleleError(f"{allele} is an invalid allele.") diff --git a/pyard/handlers/gl_string_processor.py b/pyard/handlers/gl_string_processor.py new file mode 100644 index 0000000..add5521 --- /dev/null +++ b/pyard/handlers/gl_string_processor.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- + +import functools +from typing import List, TYPE_CHECKING + +from ..constants import VALID_REDUCTION_TYPES +from ..misc import validate_reduction_type + +if TYPE_CHECKING: + from ..ard import ARD + + +class GLStringProcessor: + """Handles GL string parsing, validation and processing""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def process_gl_string( + self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx" + ) -> str: + """Main GL string processing logic extracted from redux method""" + validate_reduction_type(redux_type) + + if self.ard._config["strict"]: + self.validate_gl_string(glstring) + + # Handle GL string delimiters + if "^" in glstring: + return self._sorted_unique_gl( + [self.ard.redux(a, redux_type) for a in glstring.split("^")], "^" + ) + + if "|" in glstring: + return self._sorted_unique_gl( + [self.ard.redux(a, redux_type) for a in glstring.split("|")], "|" + ) + + if "+" in glstring: + return self._sorted_unique_gl( + [self.ard.redux(a, redux_type) for a in glstring.split("+")], "+" + ) + + if "~" in glstring: + return self._sorted_unique_gl( + [self.ard.redux(a, redux_type) for a in glstring.split("~")], "~" + ) + + if "/" in glstring: + return self._sorted_unique_gl( + [self.ard.redux(a, redux_type) for a in glstring.split("/")], "/" + ) + + return glstring + + def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: + """Make a list of sorted unique GL Strings separated by delim""" + if delim == "~": + return delim.join(gls) + + if delim == "+": + non_empty_gls = filter(lambda s: s != "", gls) + return delim.join( + sorted( + non_empty_gls, + key=functools.cmp_to_key( + lambda a, b: self.ard.smart_sort_comparator( + a, b, self.ard._config["ignore_allele_with_suffixes"] + ) + ), + ) + ) + + all_gls = [] + for gl in gls: + all_gls += gl.split(delim) + unique_gls = filter(lambda s: s != "", set(all_gls)) + return delim.join( + sorted( + unique_gls, + key=functools.cmp_to_key( + lambda a, b: self.ard.smart_sort_comparator( + a, b, self.ard._config["ignore_allele_with_suffixes"] + ) + ), + ) + ) + + def validate_gl_string(self, glstring: str) -> bool: + """Validate GL string structure and components""" + if "^" in glstring: + return all(map(self.validate_gl_string, glstring.split("^"))) + if "|" in glstring: + return all(map(self.validate_gl_string, glstring.split("|"))) + if "+" in glstring: + return all(map(self.validate_gl_string, glstring.split("+"))) + if "~" in glstring: + return all(map(self.validate_gl_string, glstring.split("~"))) + if "/" in glstring: + return all(map(self.validate_gl_string, glstring.split("/"))) + + # what falls through here is an allele + is_valid_allele = self.ard._is_valid(glstring) + if not is_valid_allele: + from ..exceptions import InvalidAlleleError + + raise InvalidAlleleError(f"{glstring} is not a valid Allele") + return is_valid_allele diff --git a/pyard/handlers/mac_handler.py b/pyard/handlers/mac_handler.py new file mode 100644 index 0000000..b0d5a6b --- /dev/null +++ b/pyard/handlers/mac_handler.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +import functools +import sqlite3 +from collections import Counter +from typing import Iterable, TYPE_CHECKING + +from ..constants import HLA_regex, DEFAULT_CACHE_SIZE +from ..exceptions import InvalidMACError +from .. import db + +if TYPE_CHECKING: + from ..ard import ARD + + +class MACHandler: + """Handles MAC (Multiple Allele Code) operations""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) + def is_mac(self, allele: str) -> bool: + """Check if allele is a valid MAC code""" + if ":" in allele: + allele_split = allele.split(":") + if len(allele_split) == 2: + locus_antigen, code = allele_split + if code.isalpha(): + try: + alleles = db.mac_code_to_alleles(self.ard.db_connection, code) + if alleles: + if any(map(lambda a: ":" in a, alleles)): + antigen_groups = map(lambda a: a.split(":")[0], alleles) + antigen_counts = Counter(antigen_groups) + valid_antigen = antigen_counts.most_common(1).pop()[0] + provided_antigen = locus_antigen.split("*").pop() + return provided_antigen == valid_antigen + return True + except sqlite3.OperationalError as e: + print("Error: ", e) + return False + + def expand_mac(self, mac_code: str) -> str: + """Expand MAC code into GL string of alleles""" + if self.is_mac(mac_code): + locus_antigen, code = mac_code.split(":") + if HLA_regex.search(mac_code): + locus_antigen = locus_antigen.split("-")[1] + return "/".join( + ["HLA-" + a for a in self._get_alleles(code, locus_antigen)] + ) + else: + return "/".join(self._get_alleles(code, locus_antigen)) + raise InvalidMACError(f"{mac_code} is an invalid MAC.") + + def lookup_mac(self, allelelist_gl: str) -> str: + """Find MAC code corresponding to allele list""" + alleles = allelelist_gl.split("/") + allele_fields = [allele.split("*")[1] for allele in alleles] + antigen_groups = sorted({allele.split(":")[0] for allele in allele_fields}) + + if len(antigen_groups) == 1: + mac_expansion = "/".join( + sorted({allele.split(":")[1] for allele in allele_fields}) + ) + mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion) + if mac_code: + locus = allelelist_gl.split("*")[0] + return f"{locus}*{antigen_groups[0]}:{mac_code}" + + # Try given list order + mac_expansion = "/".join(allele_fields) + mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion) + if mac_code: + locus = allelelist_gl.split("*")[0] + return f"{locus}*{antigen_groups[0]}:{mac_code}" + + # Try sorted list + mac_expansion = "/".join( + sorted( + allele_fields, key=functools.cmp_to_key(self.ard.smart_sort_comparator) + ) + ) + mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion) + if mac_code: + locus = allelelist_gl.split("*")[0] + return f"{locus}*{antigen_groups[0]}:{mac_code}" + + raise InvalidMACError(f"{allelelist_gl} does not have a MAC.") + + def _get_alleles(self, code, locus_antigen) -> Iterable[str]: + """Get alleles for MAC code""" + alleles = db.mac_code_to_alleles(self.ard.db_connection, code) + + is_allelic_expansion = any([":" in allele for allele in alleles]) + if is_allelic_expansion: + locus = locus_antigen.split("*")[0] + alleles = [f"{locus}*{a}" for a in alleles] + else: + alleles = [f"{locus_antigen}:{a}" for a in alleles] + + return list(filter(self.ard._is_allele_in_db, alleles)) diff --git a/pyard/handlers/serology_handler.py b/pyard/handlers/serology_handler.py new file mode 100644 index 0000000..a49012a --- /dev/null +++ b/pyard/handlers/serology_handler.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +from typing import Iterable, TYPE_CHECKING +from .. import db + +if TYPE_CHECKING: + from ..ard import ARD + + +class SerologyHandler: + """Handles serology-related operations""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def is_serology(self, allele: str) -> bool: + """Check if allele is valid serology""" + if "*" in allele or ":" in allele: + return False + return allele in self.ard.valid_serology_set + + def get_alleles_from_serology(self, serology: str) -> Iterable[str]: + """Get alleles corresponding to serology""" + alleles = db.serology_to_alleles(self.ard.db_connection, serology) + return set(filter(self.ard._is_allele_in_db, alleles)) + + def find_broad_splits(self, allele: str) -> tuple: + """Find broad/splits for serology""" + return self.ard.serology_mapping.find_splits(allele) + + def find_associated_antigen(self, serology: str) -> str: + """Find associated antigen for serology""" + return self.ard.serology_mapping.find_associated_antigen(serology) + + def find_xx_from_serology(self, serology: str) -> str: + """Find XX code from serology""" + if self.is_serology(serology): + return db.find_xx_for_serology(self.ard.db_connection, serology) + from ..exceptions import InvalidAlleleError + + raise InvalidAlleleError(f"{serology} is not a valid serology") diff --git a/pyard/handlers/shortnull_handler.py b/pyard/handlers/shortnull_handler.py new file mode 100644 index 0000000..fbba434 --- /dev/null +++ b/pyard/handlers/shortnull_handler.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..ard import ARD + + +class ShortNullHandler: + """Handles short null allele operations""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def is_shortnull(self, allele: str) -> bool: + """Check if allele is a valid short null""" + return allele in self.ard.shortnulls and self.ard._config["reduce_shortnull"] + + def is_null(self, allele: str) -> bool: + """Check if allele is a null allele""" + return allele.endswith("N") and not self.ard.is_mac(allele) diff --git a/pyard/handlers/v2_handler.py b/pyard/handlers/v2_handler.py new file mode 100644 index 0000000..42168ba --- /dev/null +++ b/pyard/handlers/v2_handler.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +import re +from typing import TYPE_CHECKING +from .. import db + +if TYPE_CHECKING: + from ..ard import ARD + + +class V2Handler: + """Handles V2 to V3 nomenclature conversion""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def is_v2(self, allele: str) -> bool: + """Check if allele is V2 nomenclature""" + matches_v2_format = ( + self.ard._config["reduce_v2"] + and "*" in allele + and ":" not in allele + and allele.split("*")[0] not in ["MICA", "MICB", "HFE"] + ) + + if matches_v2_format: + v3_format_allele = self.map_v2_to_v3(allele) + if v3_format_allele != allele: + if v3_format_allele.split(":").pop().isalpha(): + return self.ard.is_mac(v3_format_allele) + return self.ard._is_allele_in_db(v3_format_allele) + + return False + + def map_v2_to_v3(self, v2_allele: str) -> str: + """Convert V2 allele to V3 format""" + v3_allele = db.v2_to_v3_allele(self.ard.db_connection, v2_allele) + if not v3_allele: + v3_allele = self._predict_v3(v2_allele) + return v3_allele + + def _predict_v3(self, v2_allele: str) -> str: + """Use heuristic to predict V3 from V2""" + locus, allele_name = v2_allele.split("*") + components = re.findall(r"^(\d+)(.*)", allele_name) + if not components: + return v2_allele + + digits_field, non_digits_field = components.pop() + final_allele = digits_field + num_of_digits = len(digits_field) + + if num_of_digits == 1: + return v2_allele + + if num_of_digits > 2: + if locus.startswith("DP") and num_of_digits == 5: + final_allele = ( + digits_field[:3] + ":" + (digits_field[3:]) + non_digits_field + ) + elif num_of_digits % 2 == 0: + final_allele = self._combine_with_colon(digits_field) + non_digits_field + else: + final_allele = ( + digits_field[:2] + ":" + (digits_field[2:]) + non_digits_field + ) + else: + if non_digits_field: + final_allele = digits_field + ":" + non_digits_field + + return locus + "*" + final_allele + + @staticmethod + def _combine_with_colon(digits_field: str) -> str: + """Combine digits with colon separator""" + num_of_digits = len(digits_field) + return ":".join(digits_field[i : i + 2] for i in range(0, num_of_digits, 2)) diff --git a/pyard/handlers/xx_handler.py b/pyard/handlers/xx_handler.py new file mode 100644 index 0000000..8cf4c61 --- /dev/null +++ b/pyard/handlers/xx_handler.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..ard import ARD + + +class XXHandler: + """Handles XX code operations""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + def is_xx(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: + """Check if string is a valid XX code""" + if loc_antigen is None or code is None: + if ":" in glstring: + loc_allele = glstring.split(":") + loc_antigen, code = loc_allele[0], loc_allele[1] + else: + return False + return code == "XX" and loc_antigen in self.ard.code_mappings.xx_codes From 21ccae1cc0ca8d78f3dcaf93f3147e7fc0d885f9 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 6 Oct 2025 15:30:06 -0500 Subject: [PATCH 02/24] Strategies - Refactored so that each reduction mode is a strategy. --- pyard/__init__.py | 2 +- pyard/ard_refactored.py | 6 +- pyard/constants.py | 5 +- pyard/handlers/allele_reducer.py | 131 ++------------------------ pyard/handlers/gl_string_processor.py | 4 +- pyard/misc.py | 6 +- pyard/strategies/__init__.py | 26 +++++ pyard/strategies/base_strategy.py | 19 ++++ pyard/strategies/default_strategy.py | 26 +++++ pyard/strategies/exon_strategy.py | 32 +++++++ pyard/strategies/g_strategy.py | 18 ++++ pyard/strategies/lg_strategy.py | 36 +++++++ pyard/strategies/p_strategy.py | 15 +++ pyard/strategies/s_strategy.py | 43 +++++++++ pyard/strategies/strategy_factory.py | 44 +++++++++ pyard/strategies/u2_strategy.py | 22 +++++ pyard/strategies/w_strategy.py | 17 ++++ scripts/pyard | 9 +- 18 files changed, 323 insertions(+), 138 deletions(-) create mode 100644 pyard/strategies/__init__.py create mode 100644 pyard/strategies/base_strategy.py create mode 100644 pyard/strategies/default_strategy.py create mode 100644 pyard/strategies/exon_strategy.py create mode 100644 pyard/strategies/g_strategy.py create mode 100644 pyard/strategies/lg_strategy.py create mode 100644 pyard/strategies/p_strategy.py create mode 100644 pyard/strategies/s_strategy.py create mode 100644 pyard/strategies/strategy_factory.py create mode 100644 pyard/strategies/u2_strategy.py create mode 100644 pyard/strategies/w_strategy.py diff --git a/pyard/__init__.py b/pyard/__init__.py index 3c81f16..f9fc780 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -36,7 +36,7 @@ def init( cache_size: int = DEFAULT_CACHE_SIZE, config: dict = None, ): - from .ard import ARD + from .ard_refactored import ARD ard = ARD( imgt_version=imgt_version, diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index 4b625c2..a610d0d 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -9,9 +9,9 @@ from . import smart_sort from .constants import ( HLA_regex, - VALID_REDUCTION_TYPES, DEFAULT_CACHE_SIZE, G_GROUP_LOCI, + VALID_REDUCTION_TYPE, ) from .exceptions import InvalidMACError, InvalidTypingError from .handlers import ( @@ -156,7 +156,7 @@ def __del__(self): @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) def _redux_allele( - self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True + self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True ) -> str: """Core allele reduction with ping logic""" # Handle HLA- prefix @@ -210,7 +210,7 @@ def _redux_allele( return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str: + def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: """Main redux method using specialized handlers""" # Handle GL string delimiters first processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) diff --git a/pyard/constants.py b/pyard/constants.py index 2d5cefe..438f45e 100644 --- a/pyard/constants.py +++ b/pyard/constants.py @@ -20,12 +20,15 @@ # > http://www.opensource.org/licenses/lgpl-license.php # import re +import typing DEFAULT_CACHE_SIZE = 1_000 HLA_regex = re.compile("^HLA-") -VALID_REDUCTION_TYPES = ("G", "P", "lg", "lgx", "W", "exon", "U2", "S") +VALID_REDUCTION_MODES = ("G", "P", "lg", "lgx", "W", "exon", "U2", "S") +VALID_REDUCTION_TYPE = typing.Literal[VALID_REDUCTION_MODES] + expression_chars = ("N", "Q", "L", "S") # List of P and G characters P_and_G_chars = ("P", "G") diff --git a/pyard/handlers/allele_reducer.py b/pyard/handlers/allele_reducer.py index b070304..6128861 100644 --- a/pyard/handlers/allele_reducer.py +++ b/pyard/handlers/allele_reducer.py @@ -1,69 +1,30 @@ # -*- coding: utf-8 -*- -import functools from typing import TYPE_CHECKING -from ..constants import VALID_REDUCTION_TYPES, expression_chars -from ..exceptions import InvalidAlleleError -from ..misc import get_n_field_allele +from ..constants import VALID_REDUCTION_TYPE +from ..strategies.strategy_factory import StrategyFactory if TYPE_CHECKING: from ..ard import ARD class AlleleReducer: - """Handles core allele reduction logic""" + """Handles core allele reduction logic using Strategy Pattern""" def __init__(self, ard_instance: "ARD"): self.ard = ard_instance + self.strategy_factory = StrategyFactory(ard_instance) def reduce_allele( - self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True + self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True ) -> str: - """Core allele reduction logic extracted from _redux_allele""" - - if redux_type == "G" and allele in self.ard.ars_mappings.g_group: - if allele in self.ard.ars_mappings.dup_g: - return self.ard.ars_mappings.dup_g[allele] - else: - return self.ard.ars_mappings.g_group[allele] - - elif redux_type == "P" and allele in self.ard.ars_mappings.p_group: - return self.ard.ars_mappings.p_group[allele] - - elif redux_type in ["lgx", "lg"]: - if allele in self.ard.ars_mappings.lgx_group: - redux_allele = self.ard.ars_mappings.lgx_group[allele] - else: - redux_allele = ":".join(allele.split(":")[0:2]) - if redux_type == "lg": - return self._add_lg_suffix(redux_allele) - return redux_allele - - elif redux_type == "W": - if self.ard._is_who_allele(allele): - return allele - if allele in self.ard.code_mappings.who_group: - return self.ard.redux( - "/".join(self.ard.code_mappings.who_group[allele]), redux_type - ) - else: - return allele - - elif redux_type == "exon": - return self._handle_exon_reduction(allele) - - elif redux_type == "U2": - return self._handle_u2_reduction(allele) - - elif redux_type == "S": - return self._handle_serology_reduction(allele) - - else: - return self._handle_default_reduction(allele) + """Core allele reduction logic using Strategy Pattern""" + strategy = self.strategy_factory.get_strategy(redux_type) + return strategy.reduce(allele) def _add_lg_suffix(self, redux_allele): - """Add lg suffix to reduced allele""" + """Add lg suffix to reduced allele - kept for backward compatibility""" if "/" in redux_allele: return "/".join( [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] @@ -71,77 +32,3 @@ def _add_lg_suffix(self, redux_allele): if self.ard._config["ARS_as_lg"]: return redux_allele + "ARS" return redux_allele + "g" - - def _handle_exon_reduction(self, allele): - """Handle exon reduction type""" - if allele in self.ard.ars_mappings.exon_group: - exon_group_allele = self.ard.ars_mappings.exon_group[allele] - last_char = allele[-1] - if last_char in expression_chars: - exon_short_null_allele = exon_group_allele + last_char - if self.ard.is_shortnull(exon_short_null_allele): - return exon_short_null_allele - return exon_group_allele - else: - w_redux = self.ard.redux(allele, "W") - if w_redux == allele or len(w_redux.split(":")) == 2: - return allele - else: - return self.ard.redux(w_redux, "exon") - - def _handle_u2_reduction(self, allele): - """Handle U2 reduction type""" - allele_fields = allele.split(":") - if len(allele_fields) == 2: - return allele - allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) - if self.ard._is_allele_in_db(allele_2_fields): - return allele_2_fields - else: - return self.reduce_allele(allele, "lgx") - - def _handle_serology_reduction(self, allele): - """Handle serology reduction type""" - from .. import db - from ..misc import is_2_field_allele - - if is_2_field_allele(allele): - allele = self.reduce_allele(allele, "lgx") - serology_mapping = db.find_serology_for_allele( - self.ard.db_connection, allele, "lgx_allele_list" - ) - else: - serology_mapping = db.find_serology_for_allele( - self.ard.db_connection, allele - ) - - serology_set = set() - for serology, allele_list in serology_mapping.items(): - if allele in allele_list.split("/"): - serology_set.add(serology) - - if not serology_set and is_2_field_allele(allele): - for serology, allele_list in serology_mapping.items(): - allele_list_lgx = self.ard.redux(allele_list, "lgx") - if allele in allele_list_lgx.split("/"): - serology_set.add(serology) - - return "/".join( - sorted( - serology_set, key=functools.cmp_to_key(self.ard.smart_sort_comparator) - ) - ) - - def _handle_default_reduction(self, allele): - """Handle default reduction cases""" - if allele.endswith("P"): - if allele in self.ard.ars_mappings.p_group.values(): - return allele - elif allele.endswith("G"): - if allele in self.ard.ars_mappings.g_group.values(): - return allele - - if self.ard._is_allele_in_db(allele): - return allele - else: - raise InvalidAlleleError(f"{allele} is an invalid allele.") diff --git a/pyard/handlers/gl_string_processor.py b/pyard/handlers/gl_string_processor.py index add5521..2cdfadd 100644 --- a/pyard/handlers/gl_string_processor.py +++ b/pyard/handlers/gl_string_processor.py @@ -3,7 +3,7 @@ import functools from typing import List, TYPE_CHECKING -from ..constants import VALID_REDUCTION_TYPES +from ..constants import VALID_REDUCTION_TYPE from ..misc import validate_reduction_type if TYPE_CHECKING: @@ -17,7 +17,7 @@ def __init__(self, ard_instance: "ARD"): self.ard = ard_instance def process_gl_string( - self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx" + self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx" ) -> str: """Main GL string processing logic extracted from redux method""" validate_reduction_type(redux_type) diff --git a/pyard/misc.py b/pyard/misc.py index b4fc950..9fc4f6c 100644 --- a/pyard/misc.py +++ b/pyard/misc.py @@ -24,7 +24,7 @@ import tempfile from typing import List -from pyard.constants import VALID_REDUCTION_TYPES, expression_chars, P_and_G_chars +from pyard.constants import VALID_REDUCTION_MODES, expression_chars, P_and_G_chars def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str: @@ -151,5 +151,5 @@ def get_default_db_directory(): def validate_reduction_type(ars_type): - if ars_type not in VALID_REDUCTION_TYPES: - raise ValueError(f"Reduction type needs to be one of {VALID_REDUCTION_TYPES}") + if ars_type not in VALID_REDUCTION_MODES: + raise ValueError(f"Reduction type needs to be one of {VALID_REDUCTION_MODES}") diff --git a/pyard/strategies/__init__.py b/pyard/strategies/__init__.py new file mode 100644 index 0000000..f76cf31 --- /dev/null +++ b/pyard/strategies/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +from .base_strategy import ReductionStrategy +from .g_strategy import GGroupStrategy +from .p_strategy import PGroupStrategy +from .lg_strategy import LGStrategy, LGXStrategy +from .w_strategy import WStrategy +from .exon_strategy import ExonStrategy +from .u2_strategy import U2Strategy +from .s_strategy import SStrategy +from .default_strategy import DefaultStrategy +from .strategy_factory import StrategyFactory + +__all__ = [ + "ReductionStrategy", + "GGroupStrategy", + "PGroupStrategy", + "LGStrategy", + "LGXStrategy", + "WStrategy", + "ExonStrategy", + "U2Strategy", + "SStrategy", + "DefaultStrategy", + "StrategyFactory", +] diff --git a/pyard/strategies/base_strategy.py b/pyard/strategies/base_strategy.py new file mode 100644 index 0000000..4b4b8c7 --- /dev/null +++ b/pyard/strategies/base_strategy.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..ard import ARD + + +class ReductionStrategy(ABC): + """Base class for all reduction strategies""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + + @abstractmethod + def reduce(self, allele: str) -> str: + """Reduce allele according to this strategy""" + pass diff --git a/pyard/strategies/default_strategy.py b/pyard/strategies/default_strategy.py new file mode 100644 index 0000000..7f95a98 --- /dev/null +++ b/pyard/strategies/default_strategy.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +from typing_extensions import override + +from .base_strategy import ReductionStrategy + + +class DefaultStrategy(ReductionStrategy): + """Default strategy for handling P/G suffixes and validation""" + + @override + def reduce(self, allele: str) -> str: + # Make this an explicit lookup to the g_group or p_group table + # for stringent validation + if allele.endswith("P"): + if allele in self.ard.ars_mappings.p_group.values(): + return allele + elif allele.endswith("G"): + if allele in self.ard.ars_mappings.g_group.values(): + return allele + + if self.ard._is_allele_in_db(allele): + return allele + else: + from ..exceptions import InvalidAlleleError + + raise InvalidAlleleError(f"{allele} is an invalid allele.") diff --git a/pyard/strategies/exon_strategy.py b/pyard/strategies/exon_strategy.py new file mode 100644 index 0000000..5011e2d --- /dev/null +++ b/pyard/strategies/exon_strategy.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +from typing import override + +from .base_strategy import ReductionStrategy + + +class ExonStrategy(ReductionStrategy): + """Strategy for exon reduction""" + + @override + def reduce(self, allele: str) -> str: + if allele in self.ard.ars_mappings.exon_group: + exon_group_allele = self.ard.ars_mappings.exon_group[allele] + # Check if the 3 field exon allele has a 4 field alleles + # that all have the same expression characters + from ..constants import expression_chars + + last_char = allele[-1] + if last_char in expression_chars: + exon_short_null_allele = exon_group_allele + last_char + if self.ard.is_shortnull(exon_short_null_allele): + return exon_short_null_allele + return exon_group_allele + else: + # Expand to W level and then reduce to exon + w_redux = self.ard.redux(allele, "W") + # If the W redux produces 2 field allele or the same allele, don't recurse + if w_redux == allele or len(w_redux.split(":")) == 2: + return allele + else: + # recurse with the W fields + return self.ard.redux(w_redux, "exon") diff --git a/pyard/strategies/g_strategy.py b/pyard/strategies/g_strategy.py new file mode 100644 index 0000000..09a4dc2 --- /dev/null +++ b/pyard/strategies/g_strategy.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +from typing_extensions import override + +from .default_strategy import DefaultStrategy + + +class GGroupStrategy(DefaultStrategy): + """Strategy for G group reduction""" + + @override + def reduce(self, allele: str) -> str: + if allele in self.ard.ars_mappings.g_group: + if allele in self.ard.ars_mappings.dup_g: + return self.ard.ars_mappings.dup_g[allele] + else: + return self.ard.ars_mappings.g_group[allele] + + return super().reduce(allele) diff --git a/pyard/strategies/lg_strategy.py b/pyard/strategies/lg_strategy.py new file mode 100644 index 0000000..46e20b2 --- /dev/null +++ b/pyard/strategies/lg_strategy.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +from typing import override + +from .base_strategy import ReductionStrategy + + +class LGXStrategy(ReductionStrategy): + """Strategy for lgx reduction""" + + @override + def reduce(self, allele: str) -> str: + if allele in self.ard.ars_mappings.lgx_group: + return self.ard.ars_mappings.lgx_group[allele] + else: + # Return allele with only first 2 fields + return ":".join(allele.split(":")[0:2]) + + +class LGStrategy(ReductionStrategy): + """Strategy for lg reduction (lgx + g suffix)""" + + @override + def reduce(self, allele: str) -> str: + lgx_strategy = LGXStrategy(self.ard) + redux_allele = lgx_strategy.reduce(allele) + return self._add_lg_suffix(redux_allele) + + def _add_lg_suffix(self, redux_allele: str) -> str: + """Add lg suffix to reduced allele""" + if "/" in redux_allele: + return "/".join( + [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] + ) + if self.ard._config["ARS_as_lg"]: + return redux_allele + "ARS" + return redux_allele + "g" diff --git a/pyard/strategies/p_strategy.py b/pyard/strategies/p_strategy.py new file mode 100644 index 0000000..d11942b --- /dev/null +++ b/pyard/strategies/p_strategy.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- +from typing import override + +from .base_strategy import ReductionStrategy + + +class PGroupStrategy(ReductionStrategy): + """Strategy for P group reduction""" + + @override + def reduce(self, allele: str) -> str: + if allele in self.ard.ars_mappings.p_group: + return self.ard.ars_mappings.p_group[allele] + + return super().reduce(allele) diff --git a/pyard/strategies/s_strategy.py b/pyard/strategies/s_strategy.py new file mode 100644 index 0000000..93cdda6 --- /dev/null +++ b/pyard/strategies/s_strategy.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- + +import functools +from typing import override + +from .base_strategy import ReductionStrategy + + +class SStrategy(ReductionStrategy): + """Strategy for serology reduction""" + + @override + def reduce(self, allele: str) -> str: + from .. import db + from ..misc import is_2_field_allele + + # find serology equivalent in serology_mapping + if is_2_field_allele(allele): + allele = self.ard._redux_allele(allele, "lgx") + serology_mapping = db.find_serology_for_allele( + self.ard.db_connection, allele, "lgx_allele_list" + ) + else: + serology_mapping = db.find_serology_for_allele( + self.ard.db_connection, allele + ) + + serology_set = set() + for serology, allele_list in serology_mapping.items(): + if allele in allele_list.split("/"): + serology_set.add(serology) + + if not serology_set and is_2_field_allele(allele): + for serology, allele_list in serology_mapping.items(): + allele_list_lgx = self.ard.redux(allele_list, "lgx") + if allele in allele_list_lgx.split("/"): + serology_set.add(serology) + + return "/".join( + sorted( + serology_set, key=functools.cmp_to_key(self.ard.smart_sort_comparator) + ) + ) diff --git a/pyard/strategies/strategy_factory.py b/pyard/strategies/strategy_factory.py new file mode 100644 index 0000000..7fcb496 --- /dev/null +++ b/pyard/strategies/strategy_factory.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from typing import Dict, TYPE_CHECKING + +from .default_strategy import DefaultStrategy +from .base_strategy import ReductionStrategy +from .exon_strategy import ExonStrategy +from .g_strategy import GGroupStrategy +from .lg_strategy import LGStrategy, LGXStrategy +from .p_strategy import PGroupStrategy +from .s_strategy import SStrategy +from .u2_strategy import U2Strategy +from .w_strategy import WStrategy +from ..constants import VALID_REDUCTION_TYPE + +if TYPE_CHECKING: + from ..ard import ARD + + +class StrategyFactory: + """Factory for creating reduction strategies""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + self._strategies: Dict[str, ReductionStrategy] = {} + self._initialize_strategies() + + def _initialize_strategies(self): + """Initialize all reduction strategies""" + self._strategies = { + "G": GGroupStrategy(self.ard), + "P": PGroupStrategy(self.ard), + "lg": LGStrategy(self.ard), + "lgx": LGXStrategy(self.ard), + "W": WStrategy(self.ard), + "exon": ExonStrategy(self.ard), + "U2": U2Strategy(self.ard), + "S": SStrategy(self.ard), + "default": DefaultStrategy(self.ard), + } + + def get_strategy(self, redux_type: VALID_REDUCTION_TYPE) -> ReductionStrategy: + """Get the appropriate strategy for the reduction type""" + return self._strategies.get(redux_type, self._strategies["default"]) diff --git a/pyard/strategies/u2_strategy.py b/pyard/strategies/u2_strategy.py new file mode 100644 index 0000000..d98d61c --- /dev/null +++ b/pyard/strategies/u2_strategy.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +from .base_strategy import ReductionStrategy + + +class U2Strategy(ReductionStrategy): + """Strategy for U2 reduction""" + + def reduce(self, allele: str) -> str: + allele_fields = allele.split(":") + # If resolved out to second field leave alone + if len(allele_fields) == 2: + return allele + # If the 2 field reduction is unambiguous, reduce to 2 field level + from ..misc import get_n_field_allele + + allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) + if self.ard._is_allele_in_db(allele_2_fields): + return allele_2_fields + else: + # If ambiguous, reduce to G group level + return self.ard._redux_allele(allele, "lgx") diff --git a/pyard/strategies/w_strategy.py b/pyard/strategies/w_strategy.py new file mode 100644 index 0000000..4eafbe5 --- /dev/null +++ b/pyard/strategies/w_strategy.py @@ -0,0 +1,17 @@ +# -*- coding: utf-8 -*- + +from .base_strategy import ReductionStrategy + + +class WStrategy(ReductionStrategy): + """Strategy for W (WHO) reduction""" + + def reduce(self, allele: str) -> str: + if self.ard._is_who_allele(allele): + return allele + if allele in self.ard.code_mappings.who_group: + return self.ard.redux( + "/".join(self.ard.code_mappings.who_group[allele]), "W" + ) + else: + return allele diff --git a/scripts/pyard b/scripts/pyard index d03b7f8..e1fe8f1 100755 --- a/scripts/pyard +++ b/scripts/pyard @@ -22,13 +22,10 @@ # > http://www.opensource.org/licenses/lgpl-license.php # import argparse -import functools import sys -from pyard import smart_sort -from pyard.constants import VALID_REDUCTION_TYPES import pyard.misc -from pyard.db import similar_alleles, similar_mac +from pyard.constants import VALID_REDUCTION_MODES from pyard.exceptions import InvalidAlleleError, InvalidTypingError, InvalidMACError from pyard.misc import get_data_dir, get_imgt_version @@ -113,7 +110,7 @@ if __name__ == "__main__": parser.add_argument( "-r", "--redux-type", - choices=VALID_REDUCTION_TYPES, + choices=VALID_REDUCTION_MODES, dest="redux_type", help="Reduction Method", ) @@ -190,7 +187,7 @@ if __name__ == "__main__": if args.redux_type: print(ard.redux(args.gl_string, args.redux_type)) else: - for redux_type in VALID_REDUCTION_TYPES: + for redux_type in VALID_REDUCTION_MODES: redux_type_info = f"Reduction Method: {redux_type}" print(redux_type_info) print("-" * len(redux_type_info)) From ce1ef2b48823eb75b3986a97314fd2c83469026d Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 11:49:47 -0500 Subject: [PATCH 03/24] Setup handlers first before building database --- pyard/ard_refactored.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index a610d0d..10d4ac0 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -60,15 +60,15 @@ def __init__( if config: self._config.update(config) - # Initialize database and mappings - self._initialize_database(imgt_version, load_mac) - # Initialize specialized handlers self._initialize_handlers() # Setup caching self._setup_caching(max_cache_size) + # Initialize database and mappings + self._initialize_database(imgt_version, load_mac) + # Freeze reference data for Python >= 3.9 self._freeze_reference_data() From 367710b47c0556482a9fc799537a07b505899f92 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 12:04:22 -0500 Subject: [PATCH 04/24] Rename Strategy to Reducer --- pyard/handlers/allele_reducer.py | 2 +- pyard/reducers/__init__.py | 26 +++++++++++ .../base_reducer.py} | 2 +- .../default_reducer.py} | 4 +- .../exon_reducer.py} | 4 +- .../g_strategy.py => reducers/g_reducer.py} | 4 +- .../lg_strategy.py => reducers/lg_reducer.py} | 8 ++-- .../p_strategy.py => reducers/p_reducer.py} | 4 +- pyard/reducers/reducer_factory.py | 44 +++++++++++++++++++ .../s_strategy.py => reducers/s_reducer.py} | 4 +- .../u2_strategy.py => reducers/u2_reducer.py} | 4 +- .../w_strategy.py => reducers/w_reducer.py} | 4 +- pyard/strategies/__init__.py | 26 ----------- pyard/strategies/strategy_factory.py | 44 ------------------- 14 files changed, 90 insertions(+), 90 deletions(-) create mode 100644 pyard/reducers/__init__.py rename pyard/{strategies/base_strategy.py => reducers/base_reducer.py} (93%) rename pyard/{strategies/default_strategy.py => reducers/default_reducer.py} (89%) rename pyard/{strategies/exon_strategy.py => reducers/exon_reducer.py} (93%) rename pyard/{strategies/g_strategy.py => reducers/g_reducer.py} (84%) rename pyard/{strategies/lg_strategy.py => reducers/lg_reducer.py} (85%) rename pyard/{strategies/p_strategy.py => reducers/p_reducer.py} (77%) create mode 100644 pyard/reducers/reducer_factory.py rename pyard/{strategies/s_strategy.py => reducers/s_reducer.py} (94%) rename pyard/{strategies/u2_strategy.py => reducers/u2_reducer.py} (89%) rename pyard/{strategies/w_strategy.py => reducers/w_reducer.py} (83%) delete mode 100644 pyard/strategies/__init__.py delete mode 100644 pyard/strategies/strategy_factory.py diff --git a/pyard/handlers/allele_reducer.py b/pyard/handlers/allele_reducer.py index 6128861..693afef 100644 --- a/pyard/handlers/allele_reducer.py +++ b/pyard/handlers/allele_reducer.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING from ..constants import VALID_REDUCTION_TYPE -from ..strategies.strategy_factory import StrategyFactory +from ..reducers.reducer_factory import StrategyFactory if TYPE_CHECKING: from ..ard import ARD diff --git a/pyard/reducers/__init__.py b/pyard/reducers/__init__.py new file mode 100644 index 0000000..d425226 --- /dev/null +++ b/pyard/reducers/__init__.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- + +from .base_reducer import Reducer +from .g_reducer import GGroupReducer +from .p_reducer import PGroupReducer +from .lg_reducer import LGReducer, LGXReducer +from .w_reducer import WReducer +from .exon_reducer import ExonReducer +from .u2_reducer import U2Reducer +from .s_reducer import SReducer +from .default_reducer import DefaultReducer +from .reducer_factory import StrategyFactory + +__all__ = [ + "Reducer", + "GGroupReducer", + "PGroupReducer", + "LGReducer", + "LGXReducer", + "WReducer", + "ExonReducer", + "U2Reducer", + "SReducer", + "DefaultReducer", + "StrategyFactory", +] diff --git a/pyard/strategies/base_strategy.py b/pyard/reducers/base_reducer.py similarity index 93% rename from pyard/strategies/base_strategy.py rename to pyard/reducers/base_reducer.py index 4b4b8c7..a2c03f9 100644 --- a/pyard/strategies/base_strategy.py +++ b/pyard/reducers/base_reducer.py @@ -7,7 +7,7 @@ from ..ard import ARD -class ReductionStrategy(ABC): +class Reducer(ABC): """Base class for all reduction strategies""" def __init__(self, ard_instance: "ARD"): diff --git a/pyard/strategies/default_strategy.py b/pyard/reducers/default_reducer.py similarity index 89% rename from pyard/strategies/default_strategy.py rename to pyard/reducers/default_reducer.py index 7f95a98..02b2f01 100644 --- a/pyard/strategies/default_strategy.py +++ b/pyard/reducers/default_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing_extensions import override -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class DefaultStrategy(ReductionStrategy): +class DefaultReducer(Reducer): """Default strategy for handling P/G suffixes and validation""" @override diff --git a/pyard/strategies/exon_strategy.py b/pyard/reducers/exon_reducer.py similarity index 93% rename from pyard/strategies/exon_strategy.py rename to pyard/reducers/exon_reducer.py index 5011e2d..bac1fbc 100644 --- a/pyard/strategies/exon_strategy.py +++ b/pyard/reducers/exon_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing import override -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class ExonStrategy(ReductionStrategy): +class ExonReducer(Reducer): """Strategy for exon reduction""" @override diff --git a/pyard/strategies/g_strategy.py b/pyard/reducers/g_reducer.py similarity index 84% rename from pyard/strategies/g_strategy.py rename to pyard/reducers/g_reducer.py index 09a4dc2..c95f2d6 100644 --- a/pyard/strategies/g_strategy.py +++ b/pyard/reducers/g_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing_extensions import override -from .default_strategy import DefaultStrategy +from .default_reducer import DefaultReducer -class GGroupStrategy(DefaultStrategy): +class GGroupReducer(DefaultReducer): """Strategy for G group reduction""" @override diff --git a/pyard/strategies/lg_strategy.py b/pyard/reducers/lg_reducer.py similarity index 85% rename from pyard/strategies/lg_strategy.py rename to pyard/reducers/lg_reducer.py index 46e20b2..30c0f6f 100644 --- a/pyard/strategies/lg_strategy.py +++ b/pyard/reducers/lg_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing import override -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class LGXStrategy(ReductionStrategy): +class LGXReducer(Reducer): """Strategy for lgx reduction""" @override @@ -16,12 +16,12 @@ def reduce(self, allele: str) -> str: return ":".join(allele.split(":")[0:2]) -class LGStrategy(ReductionStrategy): +class LGReducer(Reducer): """Strategy for lg reduction (lgx + g suffix)""" @override def reduce(self, allele: str) -> str: - lgx_strategy = LGXStrategy(self.ard) + lgx_strategy = LGXReducer(self.ard) redux_allele = lgx_strategy.reduce(allele) return self._add_lg_suffix(redux_allele) diff --git a/pyard/strategies/p_strategy.py b/pyard/reducers/p_reducer.py similarity index 77% rename from pyard/strategies/p_strategy.py rename to pyard/reducers/p_reducer.py index d11942b..51cb5d8 100644 --- a/pyard/strategies/p_strategy.py +++ b/pyard/reducers/p_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing import override -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class PGroupStrategy(ReductionStrategy): +class PGroupReducer(Reducer): """Strategy for P group reduction""" @override diff --git a/pyard/reducers/reducer_factory.py b/pyard/reducers/reducer_factory.py new file mode 100644 index 0000000..ee1c1c8 --- /dev/null +++ b/pyard/reducers/reducer_factory.py @@ -0,0 +1,44 @@ +# -*- coding: utf-8 -*- + +from typing import Dict, TYPE_CHECKING + +from .default_reducer import DefaultReducer +from .base_reducer import Reducer +from .exon_reducer import ExonReducer +from .g_reducer import GGroupReducer +from .lg_reducer import LGReducer, LGXReducer +from .p_reducer import PGroupReducer +from .s_reducer import SReducer +from .u2_reducer import U2Reducer +from .w_reducer import WReducer +from ..constants import VALID_REDUCTION_TYPE + +if TYPE_CHECKING: + from ..ard import ARD + + +class StrategyFactory: + """Factory for creating reduction strategies""" + + def __init__(self, ard_instance: "ARD"): + self.ard = ard_instance + self._strategies: Dict[str, Reducer] = {} + self._initialize_strategies() + + def _initialize_strategies(self): + """Initialize all reduction strategies""" + self._strategies = { + "G": GGroupReducer(self.ard), + "P": PGroupReducer(self.ard), + "lg": LGReducer(self.ard), + "lgx": LGXReducer(self.ard), + "W": WReducer(self.ard), + "exon": ExonReducer(self.ard), + "U2": U2Reducer(self.ard), + "S": SReducer(self.ard), + "default": DefaultReducer(self.ard), + } + + def get_strategy(self, redux_type: VALID_REDUCTION_TYPE) -> Reducer: + """Get the appropriate strategy for the reduction type""" + return self._strategies.get(redux_type, self._strategies["default"]) diff --git a/pyard/strategies/s_strategy.py b/pyard/reducers/s_reducer.py similarity index 94% rename from pyard/strategies/s_strategy.py rename to pyard/reducers/s_reducer.py index 93cdda6..72bd4c3 100644 --- a/pyard/strategies/s_strategy.py +++ b/pyard/reducers/s_reducer.py @@ -3,10 +3,10 @@ import functools from typing import override -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class SStrategy(ReductionStrategy): +class SReducer(Reducer): """Strategy for serology reduction""" @override diff --git a/pyard/strategies/u2_strategy.py b/pyard/reducers/u2_reducer.py similarity index 89% rename from pyard/strategies/u2_strategy.py rename to pyard/reducers/u2_reducer.py index d98d61c..0b0ad2f 100644 --- a/pyard/strategies/u2_strategy.py +++ b/pyard/reducers/u2_reducer.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class U2Strategy(ReductionStrategy): +class U2Reducer(Reducer): """Strategy for U2 reduction""" def reduce(self, allele: str) -> str: diff --git a/pyard/strategies/w_strategy.py b/pyard/reducers/w_reducer.py similarity index 83% rename from pyard/strategies/w_strategy.py rename to pyard/reducers/w_reducer.py index 4eafbe5..e356f45 100644 --- a/pyard/strategies/w_strategy.py +++ b/pyard/reducers/w_reducer.py @@ -1,9 +1,9 @@ # -*- coding: utf-8 -*- -from .base_strategy import ReductionStrategy +from .base_reducer import Reducer -class WStrategy(ReductionStrategy): +class WReducer(Reducer): """Strategy for W (WHO) reduction""" def reduce(self, allele: str) -> str: diff --git a/pyard/strategies/__init__.py b/pyard/strategies/__init__.py deleted file mode 100644 index f76cf31..0000000 --- a/pyard/strategies/__init__.py +++ /dev/null @@ -1,26 +0,0 @@ -# -*- coding: utf-8 -*- - -from .base_strategy import ReductionStrategy -from .g_strategy import GGroupStrategy -from .p_strategy import PGroupStrategy -from .lg_strategy import LGStrategy, LGXStrategy -from .w_strategy import WStrategy -from .exon_strategy import ExonStrategy -from .u2_strategy import U2Strategy -from .s_strategy import SStrategy -from .default_strategy import DefaultStrategy -from .strategy_factory import StrategyFactory - -__all__ = [ - "ReductionStrategy", - "GGroupStrategy", - "PGroupStrategy", - "LGStrategy", - "LGXStrategy", - "WStrategy", - "ExonStrategy", - "U2Strategy", - "SStrategy", - "DefaultStrategy", - "StrategyFactory", -] diff --git a/pyard/strategies/strategy_factory.py b/pyard/strategies/strategy_factory.py deleted file mode 100644 index 7fcb496..0000000 --- a/pyard/strategies/strategy_factory.py +++ /dev/null @@ -1,44 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import Dict, TYPE_CHECKING - -from .default_strategy import DefaultStrategy -from .base_strategy import ReductionStrategy -from .exon_strategy import ExonStrategy -from .g_strategy import GGroupStrategy -from .lg_strategy import LGStrategy, LGXStrategy -from .p_strategy import PGroupStrategy -from .s_strategy import SStrategy -from .u2_strategy import U2Strategy -from .w_strategy import WStrategy -from ..constants import VALID_REDUCTION_TYPE - -if TYPE_CHECKING: - from ..ard import ARD - - -class StrategyFactory: - """Factory for creating reduction strategies""" - - def __init__(self, ard_instance: "ARD"): - self.ard = ard_instance - self._strategies: Dict[str, ReductionStrategy] = {} - self._initialize_strategies() - - def _initialize_strategies(self): - """Initialize all reduction strategies""" - self._strategies = { - "G": GGroupStrategy(self.ard), - "P": PGroupStrategy(self.ard), - "lg": LGStrategy(self.ard), - "lgx": LGXStrategy(self.ard), - "W": WStrategy(self.ard), - "exon": ExonStrategy(self.ard), - "U2": U2Strategy(self.ard), - "S": SStrategy(self.ard), - "default": DefaultStrategy(self.ard), - } - - def get_strategy(self, redux_type: VALID_REDUCTION_TYPE) -> ReductionStrategy: - """Get the appropriate strategy for the reduction type""" - return self._strategies.get(redux_type, self._strategies["default"]) From f6990ce5521179fdfdbaa5103df7646815de3429 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 12:10:49 -0500 Subject: [PATCH 05/24] Update python-tests action for unit test path --- .github/workflows/python-tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index c0b48eb..03ea709 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -41,6 +41,6 @@ jobs: - name: Run Unit Tests run: | # When run the first time, it'll build the library - python -m unittest tests.test_pyard tests.test_smart_sort + python -m unittest tests.unit.test_pyard tests.unit.test_smart_sort # When run the second time, it should use the already installed library - python -m unittest tests.test_pyard tests.test_smart_sort + python -m unittest tests.unit.test_pyard tests.unit.test_smart_sort From 2593218ee031923d3794a2d05d9175c7c133cbb2 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 12:20:32 -0500 Subject: [PATCH 06/24] =?UTF-8?q?Bump=20version:=201.5.5=20=E2=86=92=202.0?= =?UTF-8?q?.0b1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- api-spec.yaml | 2 +- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 0195989..5a36ef0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=1.5.5 +ARG PY_ARD_VERSION=2.0.0b1 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index 935cfec..c4baca3 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "1.5.5" + version: "2.0.0b1" servers: - url: 'http://localhost:8080' tags: diff --git a/pyard/__init__.py b/pyard/__init__.py index f9fc780..4302114 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -26,7 +26,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "1.5.5" +__version__ = "2.0.0b1" def init( diff --git a/setup.cfg b/setup.cfg index 659797f..5c5d004 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.5.5 +current_version = 2.0.0b1 commit = True tag = True diff --git a/setup.py b/setup.py index 3981ef1..f47f7a5 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="1.5.5", + version="2.0.0b1", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown", From 52ff6558d1ebd9acad354486dd4fb7239d93aee4 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 14:33:44 -0500 Subject: [PATCH 07/24] Make some private methods to public --- .github/workflows/python-tests.yml | 4 ++-- pyard/ard.py | 6 +++--- pyard/ard_refactored.py | 4 ++-- pyard/handlers/allele_reducer.py | 4 ++-- pyard/handlers/mac_handler.py | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/python-tests.yml b/.github/workflows/python-tests.yml index 03ea709..68e529e 100644 --- a/.github/workflows/python-tests.yml +++ b/.github/workflows/python-tests.yml @@ -41,6 +41,6 @@ jobs: - name: Run Unit Tests run: | # When run the first time, it'll build the library - python -m unittest tests.unit.test_pyard tests.unit.test_smart_sort + pytest # When run the second time, it should use the already installed library - python -m unittest tests.unit.test_pyard tests.unit.test_smart_sort + pytest diff --git a/pyard/ard.py b/pyard/ard.py index 95fbf40..d8e7ad4 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -237,7 +237,7 @@ def _redux_allele( ) if "/" in twice_redux_allele: return twice_redux_allele - if self._is_valid_allele(twice_redux_allele): + if self.is_valid_allele(twice_redux_allele): return twice_redux_allele if redux_type == "G" and allele in self.ars_mappings.g_group: @@ -821,11 +821,11 @@ def _is_valid(self, allele: str) -> bool: and not self.is_v2(allele) and not self.is_shortnull(allele) ): - return self._is_valid_allele(allele) + return self.is_valid_allele(allele) return True - def _is_valid_allele(self, allele): + def is_valid_allele(self, allele): """ Is the given allele valid? diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index 10d4ac0..83bd505 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -181,7 +181,7 @@ def _redux_allele( if allele in self.ars_mappings.p_not_g: not_g_allele = self.ars_mappings.p_not_g[allele] if redux_type == "lg": - return self.allele_reducer._add_lg_suffix(not_g_allele) + return self.allele_reducer.add_lg_suffix(not_g_allele) return not_g_allele else: redux_allele = self._redux_allele(allele, redux_type, False) @@ -280,7 +280,7 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: # Handle MAC if self._config["reduce_MAC"] and code.isalpha(): if self.mac_handler.is_mac(allele): - alleles = self.mac_handler._get_alleles(code, loc_antigen) + alleles = self.mac_handler.get_alleles(code, loc_antigen) if is_hla_prefix: alleles = [f"HLA-{a}" for a in alleles] return self.redux("/".join(alleles), redux_type) diff --git a/pyard/handlers/allele_reducer.py b/pyard/handlers/allele_reducer.py index 693afef..089ddbc 100644 --- a/pyard/handlers/allele_reducer.py +++ b/pyard/handlers/allele_reducer.py @@ -23,11 +23,11 @@ def reduce_allele( strategy = self.strategy_factory.get_strategy(redux_type) return strategy.reduce(allele) - def _add_lg_suffix(self, redux_allele): + def add_lg_suffix(self, redux_allele): """Add lg suffix to reduced allele - kept for backward compatibility""" if "/" in redux_allele: return "/".join( - [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] + [self.add_lg_suffix(allele) for allele in redux_allele.split("/")] ) if self.ard._config["ARS_as_lg"]: return redux_allele + "ARS" diff --git a/pyard/handlers/mac_handler.py b/pyard/handlers/mac_handler.py index b0d5a6b..2e2dda9 100644 --- a/pyard/handlers/mac_handler.py +++ b/pyard/handlers/mac_handler.py @@ -48,10 +48,10 @@ def expand_mac(self, mac_code: str) -> str: if HLA_regex.search(mac_code): locus_antigen = locus_antigen.split("-")[1] return "/".join( - ["HLA-" + a for a in self._get_alleles(code, locus_antigen)] + ["HLA-" + a for a in self.get_alleles(code, locus_antigen)] ) else: - return "/".join(self._get_alleles(code, locus_antigen)) + return "/".join(self.get_alleles(code, locus_antigen)) raise InvalidMACError(f"{mac_code} is an invalid MAC.") def lookup_mac(self, allelelist_gl: str) -> str: @@ -89,7 +89,7 @@ def lookup_mac(self, allelelist_gl: str) -> str: raise InvalidMACError(f"{allelelist_gl} does not have a MAC.") - def _get_alleles(self, code, locus_antigen) -> Iterable[str]: + def get_alleles(self, code, locus_antigen) -> Iterable[str]: """Get alleles for MAC code""" alleles = db.mac_code_to_alleles(self.ard.db_connection, code) From c5a1adafba9958072b68ab00070663b8da8236e7 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 15:45:02 -0500 Subject: [PATCH 08/24] Cleanup - Make `is_valid_allele` public - misc cleanup --- pyard/ard_refactored.py | 6 +++--- pyard/reducers/default_reducer.py | 5 ++--- pyard/reducers/exon_reducer.py | 2 +- pyard/reducers/p_reducer.py | 4 ++-- pyard/reducers/s_reducer.py | 5 ++--- pyard/reducers/u2_reducer.py | 2 +- setup.py | 2 ++ 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index 83bd505..a9f8482 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -204,7 +204,7 @@ def _redux_allele( ) if "/" in twice_redux_allele: return twice_redux_allele - if self._is_valid_allele(twice_redux_allele): + if self.is_valid_allele(twice_redux_allele): return twice_redux_allele return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) @@ -361,7 +361,7 @@ def _is_who_allele(self, allele: str) -> bool: def _is_allele_in_db(self, allele: str) -> bool: return allele in self.allele_group.alleles - def _is_valid_allele(self, allele: str) -> bool: + def is_valid_allele(self, allele: str) -> bool: if allele.endswith(("P", "G")): allele = allele[:-1] if "*" in allele: @@ -402,7 +402,7 @@ def _is_valid(self, allele: str) -> bool: and not self.is_v2(allele) and not self.is_shortnull(allele) ): - return self._is_valid_allele(allele) + return self.is_valid_allele(allele) return True diff --git a/pyard/reducers/default_reducer.py b/pyard/reducers/default_reducer.py index 02b2f01..d80a63e 100644 --- a/pyard/reducers/default_reducer.py +++ b/pyard/reducers/default_reducer.py @@ -2,6 +2,7 @@ from typing_extensions import override from .base_reducer import Reducer +from ..exceptions import InvalidAlleleError class DefaultReducer(Reducer): @@ -18,9 +19,7 @@ def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.g_group.values(): return allele - if self.ard._is_allele_in_db(allele): + if self.ard.is_valid_allele(allele): return allele else: - from ..exceptions import InvalidAlleleError - raise InvalidAlleleError(f"{allele} is an invalid allele.") diff --git a/pyard/reducers/exon_reducer.py b/pyard/reducers/exon_reducer.py index bac1fbc..e68e929 100644 --- a/pyard/reducers/exon_reducer.py +++ b/pyard/reducers/exon_reducer.py @@ -2,6 +2,7 @@ from typing import override from .base_reducer import Reducer +from ..constants import expression_chars class ExonReducer(Reducer): @@ -13,7 +14,6 @@ def reduce(self, allele: str) -> str: exon_group_allele = self.ard.ars_mappings.exon_group[allele] # Check if the 3 field exon allele has a 4 field alleles # that all have the same expression characters - from ..constants import expression_chars last_char = allele[-1] if last_char in expression_chars: diff --git a/pyard/reducers/p_reducer.py b/pyard/reducers/p_reducer.py index 51cb5d8..d2b9de1 100644 --- a/pyard/reducers/p_reducer.py +++ b/pyard/reducers/p_reducer.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- from typing import override -from .base_reducer import Reducer +from .default_reducer import DefaultReducer -class PGroupReducer(Reducer): +class PGroupReducer(DefaultReducer): """Strategy for P group reduction""" @override diff --git a/pyard/reducers/s_reducer.py b/pyard/reducers/s_reducer.py index 72bd4c3..012b77e 100644 --- a/pyard/reducers/s_reducer.py +++ b/pyard/reducers/s_reducer.py @@ -4,6 +4,8 @@ from typing import override from .base_reducer import Reducer +from .. import db +from ..misc import is_2_field_allele class SReducer(Reducer): @@ -11,9 +13,6 @@ class SReducer(Reducer): @override def reduce(self, allele: str) -> str: - from .. import db - from ..misc import is_2_field_allele - # find serology equivalent in serology_mapping if is_2_field_allele(allele): allele = self.ard._redux_allele(allele, "lgx") diff --git a/pyard/reducers/u2_reducer.py b/pyard/reducers/u2_reducer.py index 0b0ad2f..bc2f499 100644 --- a/pyard/reducers/u2_reducer.py +++ b/pyard/reducers/u2_reducer.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from .base_reducer import Reducer +from ..misc import get_n_field_allele class U2Reducer(Reducer): @@ -12,7 +13,6 @@ def reduce(self, allele: str) -> str: if len(allele_fields) == 2: return allele # If the 2 field reduction is unambiguous, reduce to 2 field level - from ..misc import get_n_field_allele allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) if self.ard._is_allele_in_db(allele_2_fields): diff --git a/setup.py b/setup.py index f47f7a5..acab889 100644 --- a/setup.py +++ b/setup.py @@ -63,10 +63,12 @@ "Topic :: Scientific/Engineering :: Bio-Informatics", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", "Natural Language :: English", + "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", ], test_suite="tests", tests_require=test_requirements, From a063e4bfb4703768dd788d253f9533ef8f9f95bb Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 16:29:50 -0500 Subject: [PATCH 09/24] Consolidate HLA- prefix handling Clean up imports --- pyard/ard.py | 3 +- pyard/ard_refactored.py | 49 ++++++++++++++---------------- pyard/data_repository.py | 11 ++----- pyard/handlers/__init__.py | 2 +- pyard/handlers/mac_handler.py | 2 +- pyard/handlers/serology_handler.py | 1 + pyard/handlers/v2_handler.py | 1 + pyard/loader/allele_list.py | 7 +++-- pyard/loader/cwd.py | 2 +- pyard/loader/mac_codes.py | 5 +-- pyard/loader/serology.py | 7 ++--- pyard/reducers/__init__.py | 12 ++++---- pyard/reducers/reducer_factory.py | 2 +- pyard/simple_table.py | 2 +- 14 files changed, 50 insertions(+), 56 deletions(-) diff --git a/pyard/ard.py b/pyard/ard.py index d8e7ad4..b024dd3 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -28,11 +28,9 @@ from collections import Counter from typing import Iterable, List, Union -import pyard.serology from . import data_repository as dr from . import db from . import smart_sort -from .serology import SerologyMapping from .constants import ( HLA_regex, VALID_REDUCTION_TYPES, @@ -47,6 +45,7 @@ is_2_field_allele, validate_reduction_type, ) +from .serology import SerologyMapping default_config = { "reduce_serology": True, diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index a9f8482..678df32 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -159,16 +159,6 @@ def _redux_allele( self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True ) -> str: """Core allele reduction with ping logic""" - # Handle HLA- prefix - if HLA_regex.search(allele): - hla, allele_name = allele.split("-") - redux_allele = self._redux_allele(allele_name, redux_type) - if redux_allele: - if "/" in redux_allele: - return "/".join([f"HLA-{ra}" for ra in redux_allele.split("/")]) - return f"HLA-{redux_allele}" - return redux_allele - if not self._config["strict"]: allele = self._get_non_strict_allele(allele) @@ -217,31 +207,32 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: if processed_gl != glstring or self.is_glstring(processed_gl): return processed_gl + # Remove HLA- prefix for processing the allele + is_hla_prefix = HLA_regex.search(glstring) + if is_hla_prefix: + allele = glstring.split("-")[1] + else: + allele = glstring # Handle ignored allele suffixes if self._config["ignore_allele_with_suffixes"]: - _, fields = glstring.split("*") + _, fields = allele.split("*") if fields in self._config["ignore_allele_with_suffixes"]: - return glstring + return allele # Handle V2 to V3 mapping - if self.v2_handler.is_v2(glstring): - glstring = self.v2_handler.map_v2_to_v3(glstring) - return self.redux(glstring, redux_type) + if self.v2_handler.is_v2(allele): + allele = self.v2_handler.map_v2_to_v3(allele) + return self.redux(allele, redux_type) # Handle Serology if self._config["reduce_serology"] and self.serology_handler.is_serology( - glstring + allele ): - alleles = self.serology_handler.get_alleles_from_serology(glstring) + alleles = self.serology_handler.get_alleles_from_serology(allele) if alleles: return self.redux("/".join(alleles), redux_type) return "" - is_hla_prefix = HLA_regex.search(glstring) - if is_hla_prefix: - allele = glstring.split("-")[1] - else: - allele = glstring # Validate format if ":" in allele: loc_allele = allele.split(":") @@ -259,7 +250,7 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: if "*" in allele: locus, _ = allele.split("*") if locus not in G_GROUP_LOCI: - return glstring + return allele raise InvalidTypingError( f"{glstring} is not a valid V2 or Serology typing." ) @@ -289,11 +280,17 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: # Handle short nulls if self._config["reduce_shortnull"] and self.shortnull_handler.is_shortnull( - glstring + allele ): - return self.redux("/".join(self.shortnulls[glstring]), redux_type) + return self.redux("/".join(self.shortnulls[allele]), redux_type) - return self._redux_allele(glstring, redux_type) + redux_allele = self._redux_allele(allele, redux_type) + # Add back 'HLA-' prefix when redux is done if needed + if is_hla_prefix: + if "/" in redux_allele: + return "/".join([f"HLA-{ra}" for ra in redux_allele.split("/")]) + redux_allele = f"HLA-{redux_allele}" + return redux_allele @staticmethod def is_glstring(gl_string: str) -> bool: diff --git a/pyard/data_repository.py b/pyard/data_repository.py index c8e80b8..c3ba3d7 100644 --- a/pyard/data_repository.py +++ b/pyard/data_repository.py @@ -23,24 +23,18 @@ import copy import functools import sqlite3 -import itertools import pyard.loader import pyard.loader.cwd import pyard.loader.mac_codes import pyard.loader.serology -from pyard.smart_sort import smart_sort_comparator from . import db from .constants import expression_chars from .loader.allele_list import load_allele_list +from .loader.g_group import load_g_group +from .loader.p_group import load_p_group from .loader.serology import load_serology_mappings, load_serology_broad_split_mapping from .loader.version import load_latest_version - -from .loader.p_group import load_p_group -from .loader.g_group import load_g_group - -from .simple_table import Table - from .mappings import ( ars_mapping_tables, ARSMapping, @@ -56,6 +50,7 @@ get_1field_allele, ) from .serology import broad_splits_dna_mapping, SerologyMapping +from .simple_table import Table from .smart_sort import smart_sort_comparator diff --git a/pyard/handlers/__init__.py b/pyard/handlers/__init__.py index d9ab5f5..fcc6585 100644 --- a/pyard/handlers/__init__.py +++ b/pyard/handlers/__init__.py @@ -4,9 +4,9 @@ from .gl_string_processor import GLStringProcessor from .mac_handler import MACHandler from .serology_handler import SerologyHandler +from .shortnull_handler import ShortNullHandler from .v2_handler import V2Handler from .xx_handler import XXHandler -from .shortnull_handler import ShortNullHandler __all__ = [ "AlleleReducer", diff --git a/pyard/handlers/mac_handler.py b/pyard/handlers/mac_handler.py index 2e2dda9..cfef3b6 100644 --- a/pyard/handlers/mac_handler.py +++ b/pyard/handlers/mac_handler.py @@ -5,9 +5,9 @@ from collections import Counter from typing import Iterable, TYPE_CHECKING +from .. import db from ..constants import HLA_regex, DEFAULT_CACHE_SIZE from ..exceptions import InvalidMACError -from .. import db if TYPE_CHECKING: from ..ard import ARD diff --git a/pyard/handlers/serology_handler.py b/pyard/handlers/serology_handler.py index a49012a..33778a1 100644 --- a/pyard/handlers/serology_handler.py +++ b/pyard/handlers/serology_handler.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from typing import Iterable, TYPE_CHECKING + from .. import db if TYPE_CHECKING: diff --git a/pyard/handlers/v2_handler.py b/pyard/handlers/v2_handler.py index 42168ba..a3106b2 100644 --- a/pyard/handlers/v2_handler.py +++ b/pyard/handlers/v2_handler.py @@ -2,6 +2,7 @@ import re from typing import TYPE_CHECKING + from .. import db if TYPE_CHECKING: diff --git a/pyard/loader/allele_list.py b/pyard/loader/allele_list.py index 2389120..670a336 100644 --- a/pyard/loader/allele_list.py +++ b/pyard/loader/allele_list.py @@ -1,9 +1,10 @@ -from urllib.request import urlopen -from urllib.error import URLError import csv import sys -from ..simple_table import Table +from urllib.error import URLError +from urllib.request import urlopen + from ..loader import IMGT_HLA_URL +from ..simple_table import Table def load_allele_list(imgt_version): diff --git a/pyard/loader/cwd.py b/pyard/loader/cwd.py index 3d17117..090b875 100644 --- a/pyard/loader/cwd.py +++ b/pyard/loader/cwd.py @@ -1,5 +1,5 @@ -import os import csv +import os def load_cwd2(): diff --git a/pyard/loader/mac_codes.py b/pyard/loader/mac_codes.py index 5857b6c..4b320ed 100644 --- a/pyard/loader/mac_codes.py +++ b/pyard/loader/mac_codes.py @@ -1,8 +1,9 @@ +import io import sys +import zipfile from urllib.error import URLError from urllib.request import urlopen -import zipfile -import io + from ..simple_table import Table diff --git a/pyard/loader/serology.py b/pyard/loader/serology.py index 391da7b..f606e92 100644 --- a/pyard/loader/serology.py +++ b/pyard/loader/serology.py @@ -1,9 +1,8 @@ import sys -import csv -import io -from typing import Tuple, Dict, List -from urllib.request import urlopen +from typing import Tuple, List from urllib.error import URLError +from urllib.request import urlopen + from ..simple_table import Table # GitHub URL where IMGT HLA files are downloaded. diff --git a/pyard/reducers/__init__.py b/pyard/reducers/__init__.py index d425226..2c1bcaa 100644 --- a/pyard/reducers/__init__.py +++ b/pyard/reducers/__init__.py @@ -1,15 +1,15 @@ # -*- coding: utf-8 -*- from .base_reducer import Reducer +from .default_reducer import DefaultReducer +from .exon_reducer import ExonReducer from .g_reducer import GGroupReducer -from .p_reducer import PGroupReducer from .lg_reducer import LGReducer, LGXReducer -from .w_reducer import WReducer -from .exon_reducer import ExonReducer -from .u2_reducer import U2Reducer -from .s_reducer import SReducer -from .default_reducer import DefaultReducer +from .p_reducer import PGroupReducer from .reducer_factory import StrategyFactory +from .s_reducer import SReducer +from .u2_reducer import U2Reducer +from .w_reducer import WReducer __all__ = [ "Reducer", diff --git a/pyard/reducers/reducer_factory.py b/pyard/reducers/reducer_factory.py index ee1c1c8..e896902 100644 --- a/pyard/reducers/reducer_factory.py +++ b/pyard/reducers/reducer_factory.py @@ -2,8 +2,8 @@ from typing import Dict, TYPE_CHECKING -from .default_reducer import DefaultReducer from .base_reducer import Reducer +from .default_reducer import DefaultReducer from .exon_reducer import ExonReducer from .g_reducer import GGroupReducer from .lg_reducer import LGReducer, LGXReducer diff --git a/pyard/simple_table.py b/pyard/simple_table.py index 118d309..2e71a56 100644 --- a/pyard/simple_table.py +++ b/pyard/simple_table.py @@ -1,6 +1,6 @@ -import sqlite3 import csv import itertools +import sqlite3 from collections import defaultdict from typing import List From 85066e0644162e25aed239749c4c5420f2e98476 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 17:01:02 -0500 Subject: [PATCH 10/24] extract `_redux_non_glstring` method from `redux` --- pyard/ard_refactored.py | 61 +++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 29 deletions(-) diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index 678df32..faefcef 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -12,6 +12,7 @@ DEFAULT_CACHE_SIZE, G_GROUP_LOCI, VALID_REDUCTION_TYPE, + expression_chars, ) from .exceptions import InvalidMACError, InvalidTypingError from .handlers import ( @@ -199,24 +200,16 @@ def _redux_allele( return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) - @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: - """Main redux method using specialized handlers""" - # Handle GL string delimiters first - processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) - if processed_gl != glstring or self.is_glstring(processed_gl): - return processed_gl - - # Remove HLA- prefix for processing the allele - is_hla_prefix = HLA_regex.search(glstring) - if is_hla_prefix: - allele = glstring.split("-")[1] - else: - allele = glstring - # Handle ignored allele suffixes - if self._config["ignore_allele_with_suffixes"]: - _, fields = allele.split("*") - if fields in self._config["ignore_allele_with_suffixes"]: + def _redux_non_glstring( + self, allele: str, glstring: str, redux_type: VALID_REDUCTION_TYPE + ): + if "*" in allele: + locus, fields = allele.split("*") + # Handle ignored allele suffixes + if self._config["ignore_allele_with_suffixes"]: + if fields in self._config["ignore_allele_with_suffixes"]: + return allele + if locus not in G_GROUP_LOCI: return allele # Handle V2 to V3 mapping @@ -233,7 +226,7 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: return self.redux("/".join(alleles), redux_type) return "" - # Validate format + # Validate allele format is correct if ":" in allele: loc_allele = allele.split(":") if len(loc_allele) < 2: @@ -247,10 +240,6 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: f"{glstring} is not a valid V2 or Serology typing." ) else: - if "*" in allele: - locus, _ = allele.split("*") - if locus not in G_GROUP_LOCI: - return allele raise InvalidTypingError( f"{glstring} is not a valid V2 or Serology typing." ) @@ -264,16 +253,12 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: reduced_alleles = self.redux( "/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type ) - if is_hla_prefix: - return "/".join([f"HLA-{a}" for a in reduced_alleles.split("/")]) return reduced_alleles # Handle MAC if self._config["reduce_MAC"] and code.isalpha(): if self.mac_handler.is_mac(allele): alleles = self.mac_handler.get_alleles(code, loc_antigen) - if is_hla_prefix: - alleles = [f"HLA-{a}" for a in alleles] return self.redux("/".join(alleles), redux_type) else: raise InvalidMACError(f"{glstring} is an invalid MAC.") @@ -285,6 +270,26 @@ def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: return self.redux("/".join(self.shortnulls[allele]), redux_type) redux_allele = self._redux_allele(allele, redux_type) + return redux_allele + + @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) + def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: + """Main redux method using specialized handlers""" + + # Handle GL string delimiters first + processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) + if processed_gl != glstring or self.is_glstring(processed_gl): + return processed_gl + + # Remove HLA- prefix for processing the allele + is_hla_prefix = HLA_regex.search(glstring) + if is_hla_prefix: + allele = glstring.split("-")[1] + else: + allele = glstring + + # Handle non GL string + redux_allele = self._redux_non_glstring(allele, glstring, redux_type) # Add back 'HLA-' prefix when redux is done if needed if is_hla_prefix: if "/" in redux_allele: @@ -341,8 +346,6 @@ def validate(self, glstring: str) -> bool: def _get_non_strict_allele(self, allele: str) -> str: """Handle non-strict allele validation""" - from .constants import expression_chars - if not self._is_allele_in_db(allele): for expr_char in expression_chars: if self._is_allele_in_db(allele + expr_char): From 32360560bcb5c6c3ac613a5725399595b3a09bf4 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 17 Oct 2025 17:11:51 -0500 Subject: [PATCH 11/24] =?UTF-8?q?Bump=20version:=202.0.0b1=20=E2=86=92=202?= =?UTF-8?q?.0.0b2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Dockerfile | 2 +- api-spec.yaml | 2 +- pyard/__init__.py | 2 +- setup.cfg | 2 +- setup.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5a36ef0..a9391c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,7 @@ LABEL MAINTAINER="Pradeep Bashyal" WORKDIR /app -ARG PY_ARD_VERSION=2.0.0b1 +ARG PY_ARD_VERSION=2.0.0b2 COPY requirements.txt /app RUN pip install --no-cache-dir --upgrade pip && \ diff --git a/api-spec.yaml b/api-spec.yaml index c4baca3..8582657 100644 --- a/api-spec.yaml +++ b/api-spec.yaml @@ -2,7 +2,7 @@ openapi: 3.0.3 info: title: ARD Reduction description: Reduce to ARD Level - version: "2.0.0b1" + version: "2.0.0b2" servers: - url: 'http://localhost:8080' tags: diff --git a/pyard/__init__.py b/pyard/__init__.py index 4302114..8d6bc44 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -26,7 +26,7 @@ from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" -__version__ = "2.0.0b1" +__version__ = "2.0.0b2" def init( diff --git a/setup.cfg b/setup.cfg index 5c5d004..1444266 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.0.0b1 +current_version = 2.0.0b2 commit = True tag = True diff --git a/setup.py b/setup.py index acab889..a55f702 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ setup( name="py-ard", - version="2.0.0b1", + version="2.0.0b2", description="ARD reduction for HLA with Python", long_description=readme, long_description_content_type="text/markdown", From 681760be3d04370a5280ad2e31fb0e16a8b8fd63 Mon Sep 17 00:00:00 2001 From: pbashyal-nmdp Date: Tue, 21 Oct 2025 17:24:37 +0000 Subject: [PATCH 12/24] Update changelog for 2.0.0b1 This commit was created by changelog-from-release in 'Post release' CI workflow --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d28c177..81bf254 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -776,4 +776,4 @@ yes [0.0.15]: https://github.com/nmdp-bioinformatics/py-ard/compare/0.0.14...0.0.15 [0.0.14]: https://github.com/nmdp-bioinformatics/py-ard/tree/0.0.14 - + From b310ee65e23de43b70d9a7674aef78639046585c Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 29 Oct 2025 10:57:00 -0500 Subject: [PATCH 13/24] @override is only available for Python >= 3.12 --- pyard/reducers/default_reducer.py | 2 +- pyard/reducers/exon_reducer.py | 2 +- pyard/reducers/g_reducer.py | 2 +- pyard/reducers/lg_reducer.py | 4 ++-- pyard/reducers/p_reducer.py | 2 +- pyard/reducers/s_reducer.py | 2 +- pyard/reducers/u2_reducer.py | 1 + pyard/reducers/w_reducer.py | 1 + 8 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pyard/reducers/default_reducer.py b/pyard/reducers/default_reducer.py index d80a63e..6c163a9 100644 --- a/pyard/reducers/default_reducer.py +++ b/pyard/reducers/default_reducer.py @@ -8,7 +8,7 @@ class DefaultReducer(Reducer): """Default strategy for handling P/G suffixes and validation""" - @override + # @override def reduce(self, allele: str) -> str: # Make this an explicit lookup to the g_group or p_group table # for stringent validation diff --git a/pyard/reducers/exon_reducer.py b/pyard/reducers/exon_reducer.py index e68e929..e687ea7 100644 --- a/pyard/reducers/exon_reducer.py +++ b/pyard/reducers/exon_reducer.py @@ -8,7 +8,7 @@ class ExonReducer(Reducer): """Strategy for exon reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.exon_group: exon_group_allele = self.ard.ars_mappings.exon_group[allele] diff --git a/pyard/reducers/g_reducer.py b/pyard/reducers/g_reducer.py index c95f2d6..9a0488e 100644 --- a/pyard/reducers/g_reducer.py +++ b/pyard/reducers/g_reducer.py @@ -7,7 +7,7 @@ class GGroupReducer(DefaultReducer): """Strategy for G group reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.g_group: if allele in self.ard.ars_mappings.dup_g: diff --git a/pyard/reducers/lg_reducer.py b/pyard/reducers/lg_reducer.py index 30c0f6f..fc2688b 100644 --- a/pyard/reducers/lg_reducer.py +++ b/pyard/reducers/lg_reducer.py @@ -7,7 +7,7 @@ class LGXReducer(Reducer): """Strategy for lgx reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.lgx_group: return self.ard.ars_mappings.lgx_group[allele] @@ -19,7 +19,7 @@ def reduce(self, allele: str) -> str: class LGReducer(Reducer): """Strategy for lg reduction (lgx + g suffix)""" - @override + # @override def reduce(self, allele: str) -> str: lgx_strategy = LGXReducer(self.ard) redux_allele = lgx_strategy.reduce(allele) diff --git a/pyard/reducers/p_reducer.py b/pyard/reducers/p_reducer.py index d2b9de1..7869455 100644 --- a/pyard/reducers/p_reducer.py +++ b/pyard/reducers/p_reducer.py @@ -7,7 +7,7 @@ class PGroupReducer(DefaultReducer): """Strategy for P group reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.p_group: return self.ard.ars_mappings.p_group[allele] diff --git a/pyard/reducers/s_reducer.py b/pyard/reducers/s_reducer.py index 012b77e..4bb05ef 100644 --- a/pyard/reducers/s_reducer.py +++ b/pyard/reducers/s_reducer.py @@ -11,7 +11,7 @@ class SReducer(Reducer): """Strategy for serology reduction""" - @override + # @override def reduce(self, allele: str) -> str: # find serology equivalent in serology_mapping if is_2_field_allele(allele): diff --git a/pyard/reducers/u2_reducer.py b/pyard/reducers/u2_reducer.py index bc2f499..141c5ab 100644 --- a/pyard/reducers/u2_reducer.py +++ b/pyard/reducers/u2_reducer.py @@ -7,6 +7,7 @@ class U2Reducer(Reducer): """Strategy for U2 reduction""" + # @override def reduce(self, allele: str) -> str: allele_fields = allele.split(":") # If resolved out to second field leave alone diff --git a/pyard/reducers/w_reducer.py b/pyard/reducers/w_reducer.py index e356f45..de45ef8 100644 --- a/pyard/reducers/w_reducer.py +++ b/pyard/reducers/w_reducer.py @@ -6,6 +6,7 @@ class WReducer(Reducer): """Strategy for W (WHO) reduction""" + # @override def reduce(self, allele: str) -> str: if self.ard._is_who_allele(allele): return allele From ada59b0796cc66f0d23f470705c80e11910778f6 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 29 Oct 2025 10:57:00 -0500 Subject: [PATCH 14/24] @override is only available for Python >= 3.12 --- pyard/reducers/default_reducer.py | 2 +- pyard/reducers/exon_reducer.py | 2 +- pyard/reducers/g_reducer.py | 2 +- pyard/reducers/lg_reducer.py | 4 ++-- pyard/reducers/p_reducer.py | 2 +- pyard/reducers/s_reducer.py | 2 +- pyard/reducers/u2_reducer.py | 1 + pyard/reducers/w_reducer.py | 1 + 8 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pyard/reducers/default_reducer.py b/pyard/reducers/default_reducer.py index d80a63e..6c163a9 100644 --- a/pyard/reducers/default_reducer.py +++ b/pyard/reducers/default_reducer.py @@ -8,7 +8,7 @@ class DefaultReducer(Reducer): """Default strategy for handling P/G suffixes and validation""" - @override + # @override def reduce(self, allele: str) -> str: # Make this an explicit lookup to the g_group or p_group table # for stringent validation diff --git a/pyard/reducers/exon_reducer.py b/pyard/reducers/exon_reducer.py index e68e929..e687ea7 100644 --- a/pyard/reducers/exon_reducer.py +++ b/pyard/reducers/exon_reducer.py @@ -8,7 +8,7 @@ class ExonReducer(Reducer): """Strategy for exon reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.exon_group: exon_group_allele = self.ard.ars_mappings.exon_group[allele] diff --git a/pyard/reducers/g_reducer.py b/pyard/reducers/g_reducer.py index c95f2d6..9a0488e 100644 --- a/pyard/reducers/g_reducer.py +++ b/pyard/reducers/g_reducer.py @@ -7,7 +7,7 @@ class GGroupReducer(DefaultReducer): """Strategy for G group reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.g_group: if allele in self.ard.ars_mappings.dup_g: diff --git a/pyard/reducers/lg_reducer.py b/pyard/reducers/lg_reducer.py index 30c0f6f..fc2688b 100644 --- a/pyard/reducers/lg_reducer.py +++ b/pyard/reducers/lg_reducer.py @@ -7,7 +7,7 @@ class LGXReducer(Reducer): """Strategy for lgx reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.lgx_group: return self.ard.ars_mappings.lgx_group[allele] @@ -19,7 +19,7 @@ def reduce(self, allele: str) -> str: class LGReducer(Reducer): """Strategy for lg reduction (lgx + g suffix)""" - @override + # @override def reduce(self, allele: str) -> str: lgx_strategy = LGXReducer(self.ard) redux_allele = lgx_strategy.reduce(allele) diff --git a/pyard/reducers/p_reducer.py b/pyard/reducers/p_reducer.py index d2b9de1..7869455 100644 --- a/pyard/reducers/p_reducer.py +++ b/pyard/reducers/p_reducer.py @@ -7,7 +7,7 @@ class PGroupReducer(DefaultReducer): """Strategy for P group reduction""" - @override + # @override def reduce(self, allele: str) -> str: if allele in self.ard.ars_mappings.p_group: return self.ard.ars_mappings.p_group[allele] diff --git a/pyard/reducers/s_reducer.py b/pyard/reducers/s_reducer.py index 012b77e..4bb05ef 100644 --- a/pyard/reducers/s_reducer.py +++ b/pyard/reducers/s_reducer.py @@ -11,7 +11,7 @@ class SReducer(Reducer): """Strategy for serology reduction""" - @override + # @override def reduce(self, allele: str) -> str: # find serology equivalent in serology_mapping if is_2_field_allele(allele): diff --git a/pyard/reducers/u2_reducer.py b/pyard/reducers/u2_reducer.py index bc2f499..141c5ab 100644 --- a/pyard/reducers/u2_reducer.py +++ b/pyard/reducers/u2_reducer.py @@ -7,6 +7,7 @@ class U2Reducer(Reducer): """Strategy for U2 reduction""" + # @override def reduce(self, allele: str) -> str: allele_fields = allele.split(":") # If resolved out to second field leave alone diff --git a/pyard/reducers/w_reducer.py b/pyard/reducers/w_reducer.py index e356f45..de45ef8 100644 --- a/pyard/reducers/w_reducer.py +++ b/pyard/reducers/w_reducer.py @@ -6,6 +6,7 @@ class WReducer(Reducer): """Strategy for W (WHO) reduction""" + # @override def reduce(self, allele: str) -> str: if self.ard._is_who_allele(allele): return allele From bfb286e7c3cc0e1840c037c9d72fbb16c01ad0ad Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 29 Oct 2025 11:08:26 -0500 Subject: [PATCH 15/24] Remove @override --- pyard/reducers/base_reducer.py | 4 ---- pyard/reducers/default_reducer.py | 2 -- pyard/reducers/exon_reducer.py | 2 -- pyard/reducers/g_reducer.py | 2 -- pyard/reducers/lg_reducer.py | 2 -- pyard/reducers/p_reducer.py | 1 - pyard/reducers/reducer_factory.py | 5 +---- pyard/reducers/s_reducer.py | 1 - 8 files changed, 1 insertion(+), 18 deletions(-) diff --git a/pyard/reducers/base_reducer.py b/pyard/reducers/base_reducer.py index a2c03f9..9a74953 100644 --- a/pyard/reducers/base_reducer.py +++ b/pyard/reducers/base_reducer.py @@ -1,10 +1,6 @@ # -*- coding: utf-8 -*- from abc import ABC, abstractmethod -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from ..ard import ARD class Reducer(ABC): diff --git a/pyard/reducers/default_reducer.py b/pyard/reducers/default_reducer.py index 6c163a9..cf848a6 100644 --- a/pyard/reducers/default_reducer.py +++ b/pyard/reducers/default_reducer.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from typing_extensions import override - from .base_reducer import Reducer from ..exceptions import InvalidAlleleError diff --git a/pyard/reducers/exon_reducer.py b/pyard/reducers/exon_reducer.py index e687ea7..8cb1c05 100644 --- a/pyard/reducers/exon_reducer.py +++ b/pyard/reducers/exon_reducer.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from typing import override - from .base_reducer import Reducer from ..constants import expression_chars diff --git a/pyard/reducers/g_reducer.py b/pyard/reducers/g_reducer.py index 9a0488e..d6b2b1f 100644 --- a/pyard/reducers/g_reducer.py +++ b/pyard/reducers/g_reducer.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from typing_extensions import override - from .default_reducer import DefaultReducer diff --git a/pyard/reducers/lg_reducer.py b/pyard/reducers/lg_reducer.py index fc2688b..d01580c 100644 --- a/pyard/reducers/lg_reducer.py +++ b/pyard/reducers/lg_reducer.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -from typing import override - from .base_reducer import Reducer diff --git a/pyard/reducers/p_reducer.py b/pyard/reducers/p_reducer.py index 7869455..968f425 100644 --- a/pyard/reducers/p_reducer.py +++ b/pyard/reducers/p_reducer.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- -from typing import override from .default_reducer import DefaultReducer diff --git a/pyard/reducers/reducer_factory.py b/pyard/reducers/reducer_factory.py index e896902..3b0e3cf 100644 --- a/pyard/reducers/reducer_factory.py +++ b/pyard/reducers/reducer_factory.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from typing import Dict, TYPE_CHECKING +from typing import Dict from .base_reducer import Reducer from .default_reducer import DefaultReducer @@ -13,9 +13,6 @@ from .w_reducer import WReducer from ..constants import VALID_REDUCTION_TYPE -if TYPE_CHECKING: - from ..ard import ARD - class StrategyFactory: """Factory for creating reduction strategies""" diff --git a/pyard/reducers/s_reducer.py b/pyard/reducers/s_reducer.py index 4bb05ef..258470e 100644 --- a/pyard/reducers/s_reducer.py +++ b/pyard/reducers/s_reducer.py @@ -1,7 +1,6 @@ # -*- coding: utf-8 -*- import functools -from typing import override from .base_reducer import Reducer from .. import db From f8df7ad96987a8b883aa7f46a6899acc9798803f Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 29 Oct 2025 11:28:10 -0500 Subject: [PATCH 16/24] added check for TYPE_CHECKING --- pyard/__init__.py | 4 +++- pyard/db.py | 6 +++--- pyard/reducers/base_reducer.py | 5 +++++ pyard/reducers/reducer_factory.py | 5 +++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pyard/__init__.py b/pyard/__init__.py index 8d6bc44..5b80192 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -21,8 +21,10 @@ # > http://www.fsf.org/licensing/licenses/lgpl.html # > http://www.opensource.org/licenses/lgpl-license.php # -from .blender import blender as dr_blender from .constants import DEFAULT_CACHE_SIZE + +# exports for `pyard` +from .blender import blender as dr_blender from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" diff --git a/pyard/db.py b/pyard/db.py index 87d2ec8..bc26918 100644 --- a/pyard/db.py +++ b/pyard/db.py @@ -420,7 +420,7 @@ def find_xx_for_serology(connection: sqlite3.Connection, serology: str) -> str: :param serology: serology for which to find XX allele :return: XX allele for given serology """ - query = f"SELECT xx FROM serology_mapping WHERE serology = ?" + query = "SELECT xx FROM serology_mapping WHERE serology = ?" cursor = connection.execute(query, (serology,)) results = cursor.fetchone() if results: @@ -581,7 +581,7 @@ def save_serology_mappings(db_connection, sero_mapping): # Drop the table first cursor.execute("DROP TABLE IF EXISTS serology_mapping") # Create table - create_table_sql = f"""CREATE TABLE serology_mapping ( + create_table_sql = """CREATE TABLE serology_mapping ( serology TEXT PRIMARY KEY, allele_list TEXT, lgx_allele_list TEXT, @@ -592,7 +592,7 @@ def save_serology_mappings(db_connection, sero_mapping): rows = ((k, v[0], v[1], v[2]) for k, v in sero_mapping.items()) # insert - cursor.executemany(f"INSERT INTO serology_mapping VALUES (?, ?, ?, ?)", rows) + cursor.executemany("INSERT INTO serology_mapping VALUES (?, ?, ?, ?)", rows) # commit transaction - writes to the db db_connection.commit() diff --git a/pyard/reducers/base_reducer.py b/pyard/reducers/base_reducer.py index 9a74953..38ee66d 100644 --- a/pyard/reducers/base_reducer.py +++ b/pyard/reducers/base_reducer.py @@ -2,6 +2,11 @@ from abc import ABC, abstractmethod +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..ard import ARD + class Reducer(ABC): """Base class for all reduction strategies""" diff --git a/pyard/reducers/reducer_factory.py b/pyard/reducers/reducer_factory.py index 3b0e3cf..6c48d00 100644 --- a/pyard/reducers/reducer_factory.py +++ b/pyard/reducers/reducer_factory.py @@ -13,6 +13,11 @@ from .w_reducer import WReducer from ..constants import VALID_REDUCTION_TYPE +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..ard import ARD + class StrategyFactory: """Factory for creating reduction strategies""" From c0021c22ea90785df03f0ea56bfe482437d4a6ff Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 29 Oct 2025 12:16:04 -0500 Subject: [PATCH 17/24] Update setup.py - Use `find_packages` to get all subpackages --- setup.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index a55f702..59f595a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ # > http://www.opensource.org/licenses/lgpl-license.php # -from setuptools import setup +from setuptools import setup, find_packages with open("README.md") as readme_file: readme = readme_file.read() @@ -43,9 +43,7 @@ author="CIBMTR", author_email="cibmtr-pypi@nmdp.org", url="https://github.com/nmdp-bioinformatics/py-ard", - packages=[ - "pyard", - ], + packages=find_packages(include=["pyard", "pyard.*"]), provides=["pyard"], scripts=[ "scripts/pyard", From 6b4186855a8c689545896d1845beed57a17ad9c9 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Fri, 31 Oct 2025 15:10:24 -0500 Subject: [PATCH 18/24] Tests for reducers --- tests/unit/reducers/__init__.py | 1 + tests/unit/reducers/test_all_reducers.py | 90 ++++++++++++++++ tests/unit/reducers/test_base_reducer.py | 34 +++++++ tests/unit/reducers/test_default_reducer.py | 60 +++++++++++ tests/unit/reducers/test_exon_reducer.py | 81 +++++++++++++++ tests/unit/reducers/test_g_reducer.py | 42 ++++++++ tests/unit/reducers/test_lg_reducer.py | 63 ++++++++++++ tests/unit/reducers/test_p_reducer.py | 32 ++++++ tests/unit/reducers/test_reducer_factory.py | 107 ++++++++++++++++++++ tests/unit/reducers/test_s_reducer.py | 66 ++++++++++++ tests/unit/reducers/test_u2_reducer.py | 47 +++++++++ tests/unit/reducers/test_w_reducer.py | 49 +++++++++ 12 files changed, 672 insertions(+) create mode 100644 tests/unit/reducers/__init__.py create mode 100644 tests/unit/reducers/test_all_reducers.py create mode 100644 tests/unit/reducers/test_base_reducer.py create mode 100644 tests/unit/reducers/test_default_reducer.py create mode 100644 tests/unit/reducers/test_exon_reducer.py create mode 100644 tests/unit/reducers/test_g_reducer.py create mode 100644 tests/unit/reducers/test_lg_reducer.py create mode 100644 tests/unit/reducers/test_p_reducer.py create mode 100644 tests/unit/reducers/test_reducer_factory.py create mode 100644 tests/unit/reducers/test_s_reducer.py create mode 100644 tests/unit/reducers/test_u2_reducer.py create mode 100644 tests/unit/reducers/test_w_reducer.py diff --git a/tests/unit/reducers/__init__.py b/tests/unit/reducers/__init__.py new file mode 100644 index 0000000..40a96af --- /dev/null +++ b/tests/unit/reducers/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/tests/unit/reducers/test_all_reducers.py b/tests/unit/reducers/test_all_reducers.py new file mode 100644 index 0000000..f4e0236 --- /dev/null +++ b/tests/unit/reducers/test_all_reducers.py @@ -0,0 +1,90 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers import ( + Reducer, + GGroupReducer, + PGroupReducer, + LGReducer, + LGXReducer, + WReducer, + ExonReducer, + U2Reducer, + SReducer, + DefaultReducer, + StrategyFactory, +) + + +@pytest.fixture +def mock_ard(): + """Create comprehensive mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.g_group = {"A*01:01": "A*01:01:01G"} + ard.ars_mappings.p_group = {"A*01:01": "A*01:01P"} + ard.ars_mappings.lgx_group = {"A*01:01:01": "A*01:01"} + ard.ars_mappings.exon_group = {"A*01:01:01": "A*01:01:01"} + ard.ars_mappings.dup_g = {} + ard.code_mappings = Mock() + ard.code_mappings.who_group = {} + ard.db_connection = Mock() + ard._config = {"ARS_as_lg": False} + ard._is_allele_in_db = Mock(return_value=True) + ard._is_who_allele = Mock(return_value=False) + ard._redux_allele = Mock() + ard.redux = Mock() + ard.is_shortnull = Mock(return_value=False) + ard.smart_sort_comparator = Mock() + return ard + + +def test_all_reducers_inherit_from_base(mock_ard): + """Test that all reducer classes inherit from Reducer base class""" + reducers = [ + GGroupReducer(mock_ard), + PGroupReducer(mock_ard), + LGReducer(mock_ard), + LGXReducer(mock_ard), + WReducer(mock_ard), + ExonReducer(mock_ard), + U2Reducer(mock_ard), + SReducer(mock_ard), + DefaultReducer(mock_ard), + ] + + for reducer in reducers: + assert isinstance(reducer, Reducer) + assert hasattr(reducer, "reduce") + assert hasattr(reducer, "ard") + + +def test_strategy_factory_creates_all_reducers(mock_ard): + """Test that StrategyFactory can create all reducer types""" + factory = StrategyFactory(mock_ard) + + strategies = ["G", "P", "lg", "lgx", "W", "exon", "U2", "S", "default"] + + for strategy_type in strategies: + strategy = factory.get_strategy(strategy_type) + assert isinstance(strategy, Reducer) + assert strategy.ard == mock_ard + + +def test_all_reducers_have_ard_instance(mock_ard): + """Test that all reducers store ARD instance correctly""" + reducers = [ + GGroupReducer(mock_ard), + PGroupReducer(mock_ard), + LGReducer(mock_ard), + LGXReducer(mock_ard), + WReducer(mock_ard), + ExonReducer(mock_ard), + U2Reducer(mock_ard), + SReducer(mock_ard), + DefaultReducer(mock_ard), + ] + + for reducer in reducers: + assert reducer.ard == mock_ard diff --git a/tests/unit/reducers/test_base_reducer.py b/tests/unit/reducers/test_base_reducer.py new file mode 100644 index 0000000..0712ddf --- /dev/null +++ b/tests/unit/reducers/test_base_reducer.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.base_reducer import Reducer + + +class ConcreteReducer(Reducer): + """Concrete implementation for testing abstract base class""" + + def reduce(self, allele: str) -> str: + return f"reduced_{allele}" + + +def test_reducer_initialization(): + """Test that Reducer can be initialized with ARD instance""" + mock_ard = Mock() + reducer = ConcreteReducer(mock_ard) + assert reducer.ard == mock_ard + + +def test_reducer_abstract_method(): + """Test that reduce method is implemented in concrete class""" + mock_ard = Mock() + reducer = ConcreteReducer(mock_ard) + result = reducer.reduce("A*01:01") + assert result == "reduced_A*01:01" + + +def test_reducer_cannot_be_instantiated(): + """Test that abstract Reducer class cannot be instantiated directly""" + mock_ard = Mock() + with pytest.raises(TypeError): + Reducer(mock_ard) diff --git a/tests/unit/reducers/test_default_reducer.py b/tests/unit/reducers/test_default_reducer.py new file mode 100644 index 0000000..c82a2a1 --- /dev/null +++ b/tests/unit/reducers/test_default_reducer.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.default_reducer import DefaultReducer +from pyard.exceptions import InvalidAlleleError + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.p_group = Mock() + ard.ars_mappings.g_group = Mock() + ard.is_valid_allele = Mock() + return ard + + +def test_reduce_p_group_allele(mock_ard): + """Test reduction of P group allele""" + mock_ard.ars_mappings.p_group.values.return_value = ["A*01:01P"] + + reducer = DefaultReducer(mock_ard) + result = reducer.reduce("A*01:01P") + + assert result == "A*01:01P" + + +def test_reduce_g_group_allele(mock_ard): + """Test reduction of G group allele""" + mock_ard.ars_mappings.g_group.values.return_value = ["A*01:01G"] + + reducer = DefaultReducer(mock_ard) + result = reducer.reduce("A*01:01G") + + assert result == "A*01:01G" + + +def test_reduce_valid_allele_in_db(mock_ard): + """Test reduction of valid allele in database""" + mock_ard.is_valid_allele.return_value = True + + reducer = DefaultReducer(mock_ard) + result = reducer.reduce("A*01:01") + + assert result == "A*01:01" + mock_ard.is_valid_allele.assert_called_once_with("A*01:01") + + +def test_reduce_invalid_allele_raises_error(mock_ard): + """Test that invalid allele raises InvalidAlleleError""" + mock_ard.ars_mappings.p_group.values.return_value = [] + mock_ard.ars_mappings.g_group.values.return_value = [] + mock_ard.is_valid_allele.return_value = False + + reducer = DefaultReducer(mock_ard) + + with pytest.raises(InvalidAlleleError, match="INVALID is an invalid allele"): + reducer.reduce("INVALID") diff --git a/tests/unit/reducers/test_exon_reducer.py b/tests/unit/reducers/test_exon_reducer.py new file mode 100644 index 0000000..03115cc --- /dev/null +++ b/tests/unit/reducers/test_exon_reducer.py @@ -0,0 +1,81 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch +from pyard.reducers.exon_reducer import ExonReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.exon_group = {"A*01:01:01": "A*01:01:01"} + ard.is_shortnull = Mock() + ard.redux = Mock() + return ard + + +def test_reduce_allele_in_exon_group(mock_ard): + """Test reduction of allele in exon group mapping""" + reducer = ExonReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A*01:01:01" + + +def test_reduce_allele_with_expression_char_shortnull(mock_ard): + """Test reduction of allele with expression character that is shortnull""" + mock_ard.ars_mappings.exon_group = {"A*01:01:01N": "A*01:01:01"} + mock_ard.is_shortnull.return_value = True + + with patch("pyard.reducers.exon_reducer.expression_chars", "N"): + reducer = ExonReducer(mock_ard) + result = reducer.reduce("A*01:01:01N") + + assert result == "A*01:01:01N" + mock_ard.is_shortnull.assert_called_once_with("A*01:01:01N") + + +def test_reduce_allele_with_expression_char_not_shortnull(mock_ard): + """Test reduction of allele with expression character that is not shortnull""" + mock_ard.ars_mappings.exon_group = {"A*01:01:01Q": "A*01:01:01"} + mock_ard.is_shortnull.return_value = False + + with patch("pyard.reducers.exon_reducer.expression_chars", "Q"): + reducer = ExonReducer(mock_ard) + result = reducer.reduce("A*01:01:01Q") + + assert result == "A*01:01:01" + + +def test_reduce_allele_not_in_exon_group_w_redux_same(mock_ard): + """Test reduction when W redux returns same allele""" + mock_ard.redux.return_value = "B*07:02" + + reducer = ExonReducer(mock_ard) + result = reducer.reduce("B*07:02") + + assert result == "B*07:02" + mock_ard.redux.assert_called_once_with("B*07:02", "W") + + +def test_reduce_allele_not_in_exon_group_w_redux_two_field(mock_ard): + """Test reduction when W redux returns 2-field allele""" + mock_ard.redux.return_value = "B*07:02" + + reducer = ExonReducer(mock_ard) + result = reducer.reduce("B*07:02:01") + + assert result == "B*07:02:01" + + +def test_reduce_allele_not_in_exon_group_recursive(mock_ard): + """Test recursive reduction when W redux returns different allele""" + mock_ard.redux.side_effect = ["B*07:02:01:01", "B*07:02:01"] + + reducer = ExonReducer(mock_ard) + result = reducer.reduce("B*07:02:XX") + + assert result == "B*07:02:01" + assert mock_ard.redux.call_count == 2 diff --git a/tests/unit/reducers/test_g_reducer.py b/tests/unit/reducers/test_g_reducer.py new file mode 100644 index 0000000..f965253 --- /dev/null +++ b/tests/unit/reducers/test_g_reducer.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.g_reducer import GGroupReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.g_group = {"A*01:01": "A*01:01:01G", "A*02:01": "A*02:01:02G"} + ard.ars_mappings.dup_g = {"A*02:01": "A*02:01:01G/A*02:01:02G"} + ard.ars_mappings.p_group = Mock() + ard.is_valid_allele = Mock(return_value=True) + return ard + + +def test_reduce_allele_in_g_group(mock_ard): + """Test reduction of allele in G group mapping""" + reducer = GGroupReducer(mock_ard) + result = reducer.reduce("A*01:01") + + assert result == "A*01:01:01G" + + +def test_reduce_allele_in_dup_g(mock_ard): + """Test reduction of allele in duplicate G group mapping""" + reducer = GGroupReducer(mock_ard) + result = reducer.reduce("A*02:01") + + assert result == "A*02:01:01G/A*02:01:02G" + + +def test_reduce_allele_not_in_g_group_calls_super(mock_ard): + """Test that allele not in G group calls parent reduce method""" + reducer = GGroupReducer(mock_ard) + result = reducer.reduce("B*07:02") + + assert result == "B*07:02" + mock_ard.is_valid_allele.assert_called_once_with("B*07:02") diff --git a/tests/unit/reducers/test_lg_reducer.py b/tests/unit/reducers/test_lg_reducer.py new file mode 100644 index 0000000..802b2a6 --- /dev/null +++ b/tests/unit/reducers/test_lg_reducer.py @@ -0,0 +1,63 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.lg_reducer import LGXReducer, LGReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.lgx_group = {"A*01:01:01": "A*01:01"} + ard._config = {"ARS_as_lg": False} + return ard + + +class TestLGXReducer: + def test_reduce_allele_in_lgx_group(self, mock_ard): + """Test reduction of allele in LGX group mapping""" + reducer = LGXReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A*01:01" + + def test_reduce_allele_not_in_lgx_group(self, mock_ard): + """Test reduction of allele not in LGX group returns first 2 fields""" + reducer = LGXReducer(mock_ard) + result = reducer.reduce("B*07:02:01:03") + + assert result == "B*07:02" + + def test_reduce_two_field_allele(self, mock_ard): + """Test reduction of already 2-field allele""" + reducer = LGXReducer(mock_ard) + result = reducer.reduce("C*01:02") + + assert result == "C*01:02" + + +class TestLGReducer: + def test_reduce_single_allele_with_g_suffix(self, mock_ard): + """Test reduction adds 'g' suffix to single allele""" + reducer = LGReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A*01:01g" + + def test_reduce_single_allele_with_ars_suffix(self, mock_ard): + """Test reduction adds 'ARS' suffix when configured""" + mock_ard._config = {"ARS_as_lg": True} + reducer = LGReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A*01:01ARS" + + def test_reduce_multiple_alleles_with_g_suffix(self, mock_ard): + """Test reduction adds 'g' suffix to multiple alleles""" + mock_ard.ars_mappings.lgx_group = {"A*01:01:01": "A*01:01/A*01:02"} + reducer = LGReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A*01:01g/A*01:02g" diff --git a/tests/unit/reducers/test_p_reducer.py b/tests/unit/reducers/test_p_reducer.py new file mode 100644 index 0000000..3fdd149 --- /dev/null +++ b/tests/unit/reducers/test_p_reducer.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.p_reducer import PGroupReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.ars_mappings = Mock() + ard.ars_mappings.p_group = {"A*01:01": "A*01:01P"} + ard.is_valid_allele = Mock(return_value=True) + return ard + + +def test_reduce_allele_in_p_group(mock_ard): + """Test reduction of allele in P group mapping""" + reducer = PGroupReducer(mock_ard) + result = reducer.reduce("A*01:01") + + assert result == "A*01:01P" + + +def test_reduce_allele_not_in_p_group_calls_super(mock_ard): + """Test that allele not in P group calls parent reduce method""" + reducer = PGroupReducer(mock_ard) + result = reducer.reduce("B*07:02") + + assert result == "B*07:02" + mock_ard.is_valid_allele.assert_called_once_with("B*07:02") diff --git a/tests/unit/reducers/test_reducer_factory.py b/tests/unit/reducers/test_reducer_factory.py new file mode 100644 index 0000000..fdece86 --- /dev/null +++ b/tests/unit/reducers/test_reducer_factory.py @@ -0,0 +1,107 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.reducer_factory import StrategyFactory +from pyard.reducers.g_reducer import GGroupReducer +from pyard.reducers.p_reducer import PGroupReducer +from pyard.reducers.lg_reducer import LGReducer, LGXReducer +from pyard.reducers.w_reducer import WReducer +from pyard.reducers.exon_reducer import ExonReducer +from pyard.reducers.u2_reducer import U2Reducer +from pyard.reducers.s_reducer import SReducer +from pyard.reducers.default_reducer import DefaultReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + return Mock() + + +def test_strategy_factory_initialization(mock_ard): + """Test that StrategyFactory initializes with all strategies""" + factory = StrategyFactory(mock_ard) + + assert factory.ard == mock_ard + assert len(factory._strategies) == 9 + + +def test_get_g_strategy(mock_ard): + """Test getting G group strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("G") + + assert isinstance(strategy, GGroupReducer) + + +def test_get_p_strategy(mock_ard): + """Test getting P group strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("P") + + assert isinstance(strategy, PGroupReducer) + + +def test_get_lg_strategy(mock_ard): + """Test getting lg strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("lg") + + assert isinstance(strategy, LGReducer) + + +def test_get_lgx_strategy(mock_ard): + """Test getting lgx strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("lgx") + + assert isinstance(strategy, LGXReducer) + + +def test_get_w_strategy(mock_ard): + """Test getting W strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("W") + + assert isinstance(strategy, WReducer) + + +def test_get_exon_strategy(mock_ard): + """Test getting exon strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("exon") + + assert isinstance(strategy, ExonReducer) + + +def test_get_u2_strategy(mock_ard): + """Test getting U2 strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("U2") + + assert isinstance(strategy, U2Reducer) + + +def test_get_s_strategy(mock_ard): + """Test getting S strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("S") + + assert isinstance(strategy, SReducer) + + +def test_get_default_strategy(mock_ard): + """Test getting default strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("default") + + assert isinstance(strategy, DefaultReducer) + + +def test_get_unknown_strategy_returns_default(mock_ard): + """Test that unknown strategy returns default strategy""" + factory = StrategyFactory(mock_ard) + strategy = factory.get_strategy("UNKNOWN") + + assert isinstance(strategy, DefaultReducer) diff --git a/tests/unit/reducers/test_s_reducer.py b/tests/unit/reducers/test_s_reducer.py new file mode 100644 index 0000000..22626e0 --- /dev/null +++ b/tests/unit/reducers/test_s_reducer.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch +from pyard.reducers.s_reducer import SReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard.db_connection = Mock() + ard._redux_allele = Mock() + ard.redux = Mock() + ard.smart_sort_comparator = Mock() + return ard + + +def test_reduce_two_field_allele(mock_ard): + """Test reduction of 2-field allele""" + mock_ard._redux_allele.return_value = "A*01:01" + mock_ard.smart_sort_comparator.return_value = 0 + + serology_mapping = {"A1": "A*01:01/A*01:02"} + + with patch("pyard.reducers.s_reducer.is_2_field_allele", return_value=True), patch( + "pyard.reducers.s_reducer.db.find_serology_for_allele", + return_value=serology_mapping, + ), patch("functools.cmp_to_key"): + reducer = SReducer(mock_ard) + result = reducer.reduce("A*01:01") + + assert result == "A1" + + +def test_reduce_non_two_field_allele(mock_ard): + """Test reduction of non-2-field allele""" + mock_ard.smart_sort_comparator.return_value = 0 + + serology_mapping = {"A1": "A*01:01:01/A*01:02:01"} + + with patch("pyard.reducers.s_reducer.is_2_field_allele", return_value=False), patch( + "pyard.reducers.s_reducer.db.find_serology_for_allele", + return_value=serology_mapping, + ), patch("functools.cmp_to_key"): + reducer = SReducer(mock_ard) + result = reducer.reduce("A*01:01:01") + + assert result == "A1" + + +def test_reduce_multiple_serology_matches(mock_ard): + """Test reduction with multiple serology matches""" + mock_ard.smart_sort_comparator.return_value = 0 + + serology_mapping = {"A1": "A*01:01/A*01:02", "A36": "A*01:01/A*36:01"} + + with patch("pyard.reducers.s_reducer.is_2_field_allele", return_value=False), patch( + "pyard.reducers.s_reducer.db.find_serology_for_allele", + return_value=serology_mapping, + ), patch("functools.cmp_to_key", side_effect=lambda x: x): + reducer = SReducer(mock_ard) + result = reducer.reduce("A*01:01") + + # Should return sorted serology codes + assert "A1" in result and "A36" in result diff --git a/tests/unit/reducers/test_u2_reducer.py b/tests/unit/reducers/test_u2_reducer.py new file mode 100644 index 0000000..8422a2d --- /dev/null +++ b/tests/unit/reducers/test_u2_reducer.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch +from pyard.reducers.u2_reducer import U2Reducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard._is_allele_in_db = Mock() + ard._redux_allele = Mock() + return ard + + +def test_reduce_two_field_allele_unchanged(mock_ard): + """Test that 2-field allele is returned unchanged""" + reducer = U2Reducer(mock_ard) + result = reducer.reduce("A*01:01") + + assert result == "A*01:01" + + +def test_reduce_multi_field_allele_unambiguous(mock_ard): + """Test reduction of multi-field allele that is unambiguous at 2-field level""" + mock_ard._is_allele_in_db.return_value = True + + with patch("pyard.reducers.u2_reducer.get_n_field_allele", return_value="A*01:01"): + reducer = U2Reducer(mock_ard) + result = reducer.reduce("A*01:01:01:01") + + assert result == "A*01:01" + mock_ard._is_allele_in_db.assert_called_once_with("A*01:01") + + +def test_reduce_multi_field_allele_ambiguous(mock_ard): + """Test reduction of multi-field allele that is ambiguous at 2-field level""" + mock_ard._is_allele_in_db.return_value = False + mock_ard._redux_allele.return_value = "A*01:01" + + with patch("pyard.reducers.u2_reducer.get_n_field_allele", return_value="A*01:01"): + reducer = U2Reducer(mock_ard) + result = reducer.reduce("A*01:01:01:01") + + assert result == "A*01:01" + mock_ard._redux_allele.assert_called_once_with("A*01:01:01:01", "lgx") diff --git a/tests/unit/reducers/test_w_reducer.py b/tests/unit/reducers/test_w_reducer.py new file mode 100644 index 0000000..72b3091 --- /dev/null +++ b/tests/unit/reducers/test_w_reducer.py @@ -0,0 +1,49 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock +from pyard.reducers.w_reducer import WReducer + + +@pytest.fixture +def mock_ard(): + """Create mock ARD instance""" + ard = Mock() + ard._is_who_allele = Mock() + ard.code_mappings = Mock() + ard.code_mappings.who_group = {"A*01:XX": ["A*01:01", "A*01:02"]} + ard.redux = Mock() + return ard + + +def test_reduce_who_allele_returns_unchanged(mock_ard): + """Test that WHO allele is returned unchanged""" + mock_ard._is_who_allele.return_value = True + + reducer = WReducer(mock_ard) + result = reducer.reduce("A*01:01:01:01") + + assert result == "A*01:01:01:01" + mock_ard._is_who_allele.assert_called_once_with("A*01:01:01:01") + + +def test_reduce_allele_in_who_group(mock_ard): + """Test reduction of allele in WHO group mapping""" + mock_ard._is_who_allele.return_value = False + mock_ard.redux.return_value = "A*01:01/A*01:02" + + reducer = WReducer(mock_ard) + result = reducer.reduce("A*01:XX") + + assert result == "A*01:01/A*01:02" + mock_ard.redux.assert_called_once_with("A*01:01/A*01:02", "W") + + +def test_reduce_allele_not_in_who_group(mock_ard): + """Test that allele not in WHO group is returned unchanged""" + mock_ard._is_who_allele.return_value = False + + reducer = WReducer(mock_ard) + result = reducer.reduce("B*07:02") + + assert result == "B*07:02" From 26a7dd0745e867d75870f0ceb4c222d9e1318b40 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 3 Nov 2025 13:26:19 -0600 Subject: [PATCH 19/24] Document modules - Document all the modules in loader/reducers and handlers packages --- pyard/ard_refactored.py | 8 +- pyard/handlers/__init__.py | 8 +- pyard/handlers/allele_handler.py | 73 +++++++++++++++++ pyard/handlers/allele_reducer.py | 34 -------- pyard/handlers/gl_string_processor.py | 80 ++++++++++++++++-- pyard/handlers/mac_handler.py | 90 +++++++++++++++++++-- pyard/handlers/serology_handler.py | 84 +++++++++++++++++-- pyard/handlers/shortnull_handler.py | 42 +++++++++- pyard/handlers/v2_handler.py | 88 +++++++++++++++++--- pyard/handlers/xx_handler.py | 31 ++++++- pyard/loader/g_group.py | 30 ++++++- pyard/loader/p_group.py | 22 +++++ pyard/loader/serology.py | 18 +++++ pyard/reducers/base_reducer.py | 98 +++++++++++++++++++++- pyard/reducers/default_reducer.py | 86 +++++++++++++++++++- pyard/reducers/exon_reducer.py | 87 ++++++++++++++++++-- pyard/reducers/g_reducer.py | 61 +++++++++++++- pyard/reducers/lg_reducer.py | 112 ++++++++++++++++++++++++-- pyard/reducers/p_reducer.py | 64 ++++++++++++++- pyard/reducers/s_reducer.py | 77 +++++++++++++++++- pyard/reducers/u2_reducer.py | 76 +++++++++++++++-- pyard/reducers/w_reducer.py | 82 +++++++++++++++++-- 22 files changed, 1238 insertions(+), 113 deletions(-) create mode 100644 pyard/handlers/allele_handler.py delete mode 100644 pyard/handlers/allele_reducer.py diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py index faefcef..6f43db7 100644 --- a/pyard/ard_refactored.py +++ b/pyard/ard_refactored.py @@ -16,8 +16,8 @@ ) from .exceptions import InvalidMACError, InvalidTypingError from .handlers import ( - AlleleReducer, - GLStringProcessor, + AlleleHandler, + GLStringHandler, MACHandler, SerologyHandler, V2Handler, @@ -118,8 +118,8 @@ def _initialize_database(self, imgt_version: str, load_mac: bool): def _initialize_handlers(self): """Initialize all specialized handlers""" - self.allele_reducer = AlleleReducer(self) - self.gl_processor = GLStringProcessor(self) + self.allele_reducer = AlleleHandler(self) + self.gl_processor = GLStringHandler(self) self.mac_handler = MACHandler(self) self.serology_handler = SerologyHandler(self) self.v2_handler = V2Handler(self) diff --git a/pyard/handlers/__init__.py b/pyard/handlers/__init__.py index fcc6585..dc1ec0b 100644 --- a/pyard/handlers/__init__.py +++ b/pyard/handlers/__init__.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- -from .allele_reducer import AlleleReducer -from .gl_string_processor import GLStringProcessor +from .allele_handler import AlleleHandler +from .gl_string_processor import GLStringHandler from .mac_handler import MACHandler from .serology_handler import SerologyHandler from .shortnull_handler import ShortNullHandler @@ -9,8 +9,8 @@ from .xx_handler import XXHandler __all__ = [ - "AlleleReducer", - "GLStringProcessor", + "AlleleHandler", + "GLStringHandler", "MACHandler", "SerologyHandler", "V2Handler", diff --git a/pyard/handlers/allele_handler.py b/pyard/handlers/allele_handler.py new file mode 100644 index 0000000..7afa0c2 --- /dev/null +++ b/pyard/handlers/allele_handler.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +from typing import TYPE_CHECKING + +from ..constants import VALID_REDUCTION_TYPE +from ..reducers.reducer_factory import StrategyFactory + +if TYPE_CHECKING: + from ..ard import ARD + + +class AlleleHandler: + """Handles core allele reduction logic using Strategy Pattern + + This class serves as the main handler for reducing HLA alleles to different + resolution levels (G group, P group, lg, etc.). It uses the Strategy Pattern + to delegate the actual reduction logic to specific strategy classes. + """ + + def __init__(self, ard_instance: "ARD"): + """Initialize the AlleleReducer with an ARD instance + + Args: + ard_instance: The main ARD object containing database connections + and configuration settings + """ + self.ard = ard_instance + # Factory that creates appropriate reduction strategy based on redux_type + self.strategy_factory = StrategyFactory(ard_instance) + + def reduce_allele( + self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True + ) -> str: + """Core allele reduction logic using Strategy Pattern + + Reduces an HLA allele to the specified resolution level by delegating + to the appropriate reduction strategy. + + Args: + allele: HLA allele string to reduce (e.g., "A*01:01:01:01") + redux_type: Type of reduction to perform (G, P, lg, lgx, W, exon, U2, S) + re_ping: Whether to re-ping for P groups when G groups are unavailable + + Returns: + Reduced allele string according to the specified redux_type + """ + # Get the appropriate reduction strategy for the redux_type + strategy = self.strategy_factory.get_strategy(redux_type) + # Execute the reduction using the selected strategy + return strategy.reduce(allele) + + def add_lg_suffix(self, redux_allele): + """Add lg suffix to reduced allele - kept for backward compatibility + + Appends the appropriate suffix ('g' or 'ARS') to reduced alleles. + Handles both single alleles and ambiguous allele lists separated by '/'. + + Args: + redux_allele: Reduced allele string, may contain multiple alleles + separated by '/' + + Returns: + Allele string with appropriate suffix added to each allele + """ + # Handle ambiguous alleles (multiple alleles separated by '/') + if "/" in redux_allele: + return "/".join( + [self.add_lg_suffix(allele) for allele in redux_allele.split("/")] + ) + # Use 'ARS' suffix if configured, otherwise use 'g' suffix + if self.ard._config["ARS_as_lg"]: + return redux_allele + "ARS" + return redux_allele + "g" diff --git a/pyard/handlers/allele_reducer.py b/pyard/handlers/allele_reducer.py deleted file mode 100644 index 089ddbc..0000000 --- a/pyard/handlers/allele_reducer.py +++ /dev/null @@ -1,34 +0,0 @@ -# -*- coding: utf-8 -*- - -from typing import TYPE_CHECKING - -from ..constants import VALID_REDUCTION_TYPE -from ..reducers.reducer_factory import StrategyFactory - -if TYPE_CHECKING: - from ..ard import ARD - - -class AlleleReducer: - """Handles core allele reduction logic using Strategy Pattern""" - - def __init__(self, ard_instance: "ARD"): - self.ard = ard_instance - self.strategy_factory = StrategyFactory(ard_instance) - - def reduce_allele( - self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True - ) -> str: - """Core allele reduction logic using Strategy Pattern""" - strategy = self.strategy_factory.get_strategy(redux_type) - return strategy.reduce(allele) - - def add_lg_suffix(self, redux_allele): - """Add lg suffix to reduced allele - kept for backward compatibility""" - if "/" in redux_allele: - return "/".join( - [self.add_lg_suffix(allele) for allele in redux_allele.split("/")] - ) - if self.ard._config["ARS_as_lg"]: - return redux_allele + "ARS" - return redux_allele + "g" diff --git a/pyard/handlers/gl_string_processor.py b/pyard/handlers/gl_string_processor.py index 2cdfadd..17c1f22 100644 --- a/pyard/handlers/gl_string_processor.py +++ b/pyard/handlers/gl_string_processor.py @@ -10,54 +10,104 @@ from ..ard import ARD -class GLStringProcessor: - """Handles GL string parsing, validation and processing""" +class GLStringHandler: + """Handles GL string parsing, validation and processing + + GL (Genotype List) strings represent HLA typing data using standardized + delimiters to express ambiguity and relationships between alleles. + This class processes these complex strings by parsing delimiters and + applying reductions to individual components. + """ def __init__(self, ard_instance: "ARD"): + """Initialize the GLStringHandler with an ARD instance + + Args: + ard_instance: The main ARD object for database access and configuration + """ self.ard = ard_instance def process_gl_string( self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx" ) -> str: - """Main GL string processing logic extracted from redux method""" + """Main GL string processing logic extracted from redux method + + Processes GL strings by parsing delimiters in order of precedence + and applying reductions to individual components. GL string delimiters: + ^ = unphased genotype list + | = phased genotype list + + = allele list (multiple alleles at same locus) + ~ = possible allele list + / = ambiguous allele list + + Args: + glstring: GL string to process (e.g., "A*01:01+A*02:01^B*07:02") + redux_type: Type of reduction to apply to each component + + Returns: + Processed GL string with reductions applied + """ validate_reduction_type(redux_type) + # Validate GL string structure if strict mode is enabled if self.ard._config["strict"]: self.validate_gl_string(glstring) - # Handle GL string delimiters + # Handle GL string delimiters in order of precedence + # Unphased genotype list (highest precedence) if "^" in glstring: return self._sorted_unique_gl( [self.ard.redux(a, redux_type) for a in glstring.split("^")], "^" ) + # Phased genotype list if "|" in glstring: return self._sorted_unique_gl( [self.ard.redux(a, redux_type) for a in glstring.split("|")], "|" ) + # Allele list (multiple alleles at same locus) if "+" in glstring: return self._sorted_unique_gl( [self.ard.redux(a, redux_type) for a in glstring.split("+")], "+" ) + # Possible allele list if "~" in glstring: return self._sorted_unique_gl( [self.ard.redux(a, redux_type) for a in glstring.split("~")], "~" ) + # Ambiguous allele list (lowest precedence) if "/" in glstring: return self._sorted_unique_gl( [self.ard.redux(a, redux_type) for a in glstring.split("/")], "/" ) + # Single allele - return as-is for further processing return glstring def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: - """Make a list of sorted unique GL Strings separated by delim""" + """Make a list of sorted unique GL Strings separated by delim + + Creates a sorted, deduplicated list of GL string components. + Different delimiters have different sorting behaviors: + - '~' preserves original order (no sorting/deduplication) + - '+' sorts but keeps structure intact + - Others flatten, deduplicate, and sort + + Args: + gls: List of GL string components to process + delim: Delimiter to use for joining results + + Returns: + Sorted and deduplicated GL string components joined by delimiter + """ + # Possible allele list (~) preserves original order if delim == "~": return delim.join(gls) + # Allele list (+) sorts but maintains structure if delim == "+": non_empty_gls = filter(lambda s: s != "", gls) return delim.join( @@ -71,6 +121,7 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: ) ) + # Other delimiters: flatten, deduplicate, and sort all_gls = [] for gl in gls: all_gls += gl.split(delim) @@ -87,7 +138,22 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: ) def validate_gl_string(self, glstring: str) -> bool: - """Validate GL string structure and components""" + """Validate GL string structure and components + + Recursively validates GL string by parsing delimiters and checking + that all leaf components (individual alleles) are valid according + to the ARD database. + + Args: + glstring: GL string to validate + + Returns: + True if all components are valid + + Raises: + InvalidAlleleError: If any component allele is invalid + """ + # Recursively validate components separated by each delimiter type if "^" in glstring: return all(map(self.validate_gl_string, glstring.split("^"))) if "|" in glstring: @@ -99,7 +165,7 @@ def validate_gl_string(self, glstring: str) -> bool: if "/" in glstring: return all(map(self.validate_gl_string, glstring.split("/"))) - # what falls through here is an allele + # Base case: validate individual allele against database is_valid_allele = self.ard._is_valid(glstring) if not is_valid_allele: from ..exceptions import InvalidAlleleError diff --git a/pyard/handlers/mac_handler.py b/pyard/handlers/mac_handler.py index cfef3b6..8b651f6 100644 --- a/pyard/handlers/mac_handler.py +++ b/pyard/handlers/mac_handler.py @@ -14,23 +14,51 @@ class MACHandler: - """Handles MAC (Multiple Allele Code) operations""" + """Handles MAC (Multiple Allele Code) operations + + MAC codes are shorthand representations for groups of HLA alleles that + share common characteristics. This class provides functionality to: + - Validate MAC codes + - Expand MAC codes to their constituent alleles + - Find MAC codes for given allele lists + """ def __init__(self, ard_instance: "ARD"): + """Initialize the MACHandler with an ARD instance + + Args: + ard_instance: The main ARD object for database access + """ self.ard = ard_instance @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) def is_mac(self, allele: str) -> bool: - """Check if allele is a valid MAC code""" + """Check if allele is a valid MAC code + + MAC codes have the format 'LOCUS*ANTIGEN:CODE' where CODE is alphabetic. + Validates by checking if the code exists in the database and if the + antigen group matches the provided locus. + + Args: + allele: String to check (e.g., 'A*01:AB', 'B*15:XX') + + Returns: + True if the string is a valid MAC code, False otherwise + """ + # MAC codes must contain a colon separator if ":" in allele: allele_split = allele.split(":") if len(allele_split) == 2: locus_antigen, code = allele_split + # MAC codes have alphabetic suffixes (not numeric) if code.isalpha(): try: + # Query database for alleles associated with this MAC code alleles = db.mac_code_to_alleles(self.ard.db_connection, code) if alleles: + # Check if MAC expands to full allele names (contains ':') if any(map(lambda a: ":" in a, alleles)): + # Validate that the antigen group matches antigen_groups = map(lambda a: a.split(":")[0], alleles) antigen_counts = Counter(antigen_groups) valid_antigen = antigen_counts.most_common(1).pop()[0] @@ -42,24 +70,55 @@ def is_mac(self, allele: str) -> bool: return False def expand_mac(self, mac_code: str) -> str: - """Expand MAC code into GL string of alleles""" + """Expand MAC code into GL string of alleles + + Converts a MAC code into its constituent alleles as a GL string + with '/' delimiters. Handles both HLA-prefixed and non-prefixed formats. + + Args: + mac_code: MAC code to expand (e.g., 'A*01:AB', 'HLA-A*01:AB') + + Returns: + GL string of alleles separated by '/' (e.g., 'A*01:01/A*01:02') + + Raises: + InvalidMACError: If the MAC code is invalid + """ if self.is_mac(mac_code): locus_antigen, code = mac_code.split(":") + # Handle HLA-prefixed format if HLA_regex.search(mac_code): locus_antigen = locus_antigen.split("-")[1] return "/".join( ["HLA-" + a for a in self.get_alleles(code, locus_antigen)] ) else: + # Handle standard format without HLA prefix return "/".join(self.get_alleles(code, locus_antigen)) raise InvalidMACError(f"{mac_code} is an invalid MAC.") def lookup_mac(self, allelelist_gl: str) -> str: - """Find MAC code corresponding to allele list""" + """Find MAC code corresponding to allele list + + Searches for a MAC code that represents the given list of alleles. + Tries multiple strategies: single antigen group optimization, + original order, and sorted order. + + Args: + allelelist_gl: GL string of alleles separated by '/' + (e.g., 'A*01:01/A*01:02/A*01:03') + + Returns: + MAC code representing the allele list (e.g., 'A*01:AB') + + Raises: + InvalidMACError: If no MAC code exists for the allele list + """ alleles = allelelist_gl.split("/") allele_fields = [allele.split("*")[1] for allele in alleles] antigen_groups = sorted({allele.split(":")[0] for allele in allele_fields}) + # Optimization: if all alleles share same antigen group, use field suffixes only if len(antigen_groups) == 1: mac_expansion = "/".join( sorted({allele.split(":")[1] for allele in allele_fields}) @@ -69,14 +128,14 @@ def lookup_mac(self, allelelist_gl: str) -> str: locus = allelelist_gl.split("*")[0] return f"{locus}*{antigen_groups[0]}:{mac_code}" - # Try given list order + # Strategy 1: Try alleles in given order mac_expansion = "/".join(allele_fields) mac_code = db.alleles_to_mac_code(self.ard.db_connection, mac_expansion) if mac_code: locus = allelelist_gl.split("*")[0] return f"{locus}*{antigen_groups[0]}:{mac_code}" - # Try sorted list + # Strategy 2: Try alleles in sorted order mac_expansion = "/".join( sorted( allele_fields, key=functools.cmp_to_key(self.ard.smart_sort_comparator) @@ -90,14 +149,31 @@ def lookup_mac(self, allelelist_gl: str) -> str: raise InvalidMACError(f"{allelelist_gl} does not have a MAC.") def get_alleles(self, code, locus_antigen) -> Iterable[str]: - """Get alleles for MAC code""" + """Get alleles for MAC code + + Retrieves the list of alleles that a MAC code represents from the database. + Handles two formats: full allele expansions and field suffix expansions. + + Args: + code: MAC code suffix (e.g., 'AB', 'XX') + locus_antigen: Locus and antigen part (e.g., 'A*01', 'B*15') + + Returns: + List of alleles that the MAC code represents, filtered to only + include alleles present in the current database + """ + # Query database for alleles associated with this MAC code alleles = db.mac_code_to_alleles(self.ard.db_connection, code) + # Check if MAC expands to full allele names (contains ':') is_allelic_expansion = any([":" in allele for allele in alleles]) if is_allelic_expansion: + # Full allele format: prepend locus only locus = locus_antigen.split("*")[0] alleles = [f"{locus}*{a}" for a in alleles] else: + # Field suffix format: append to locus_antigen alleles = [f"{locus_antigen}:{a}" for a in alleles] + # Filter to only include alleles that exist in current database return list(filter(self.ard._is_allele_in_db, alleles)) diff --git a/pyard/handlers/serology_handler.py b/pyard/handlers/serology_handler.py index 33778a1..6e27054 100644 --- a/pyard/handlers/serology_handler.py +++ b/pyard/handlers/serology_handler.py @@ -9,32 +9,104 @@ class SerologyHandler: - """Handles serology-related operations""" + """Handles serology-related operations + + Serology refers to the historical method of HLA typing using antibodies + to detect cell surface antigens. This class provides functionality to: + - Validate serological typing designations + - Convert between serology and molecular allele representations + - Handle broad/split antigen relationships + """ def __init__(self, ard_instance: "ARD"): + """Initialize the SerologyHandler with an ARD instance + + Args: + ard_instance: The main ARD object for database access and serology mappings + """ self.ard = ard_instance def is_serology(self, allele: str) -> bool: - """Check if allele is valid serology""" + """Check if allele is valid serology + + Serological designations are simple alphanumeric codes without + the '*' or ':' characters used in molecular typing (e.g., 'A1', 'B27', 'DR4'). + + Args: + allele: String to check for serology format + + Returns: + True if the string is a valid serological designation, False otherwise + """ + # Serology codes don't contain molecular typing delimiters if "*" in allele or ":" in allele: return False + # Check against the set of valid serology codes in the database return allele in self.ard.valid_serology_set def get_alleles_from_serology(self, serology: str) -> Iterable[str]: - """Get alleles corresponding to serology""" + """Get alleles corresponding to serology + + Converts a serological designation to its corresponding molecular alleles. + Multiple alleles may correspond to a single serology due to the lower + resolution of serological typing methods. + + Args: + serology: Serological designation (e.g., 'A1', 'B27') + + Returns: + Set of molecular alleles that correspond to the serology, + filtered to only include alleles present in the current database + """ + # Query database for alleles associated with this serology alleles = db.serology_to_alleles(self.ard.db_connection, serology) + # Filter to only include alleles that exist in current database return set(filter(self.ard._is_allele_in_db, alleles)) def find_broad_splits(self, allele: str) -> tuple: - """Find broad/splits for serology""" + """Find broad/splits for serology + + In serology, some antigens are 'broad' (general) while others are + 'splits' (more specific subdivisions). This method finds the + broad/split relationships for a given antigen. + + Args: + allele: Serological or molecular designation + + Returns: + Tuple containing broad and split antigen information + """ return self.ard.serology_mapping.find_splits(allele) def find_associated_antigen(self, serology: str) -> str: - """Find associated antigen for serology""" + """Find associated antigen for serology + + Some serological designations have associated or related antigens. + This method finds the primary antigen associated with a given serology. + + Args: + serology: Serological designation + + Returns: + Associated antigen designation + """ return self.ard.serology_mapping.find_associated_antigen(serology) def find_xx_from_serology(self, serology: str) -> str: - """Find XX code from serology""" + """Find XX code from serology + + XX codes represent groups of alleles that share serological reactivity. + This method finds the XX code that corresponds to a given serology. + + Args: + serology: Serological designation to look up + + Returns: + XX code corresponding to the serology + + Raises: + InvalidAlleleError: If the serology is not valid + """ if self.is_serology(serology): return db.find_xx_for_serology(self.ard.db_connection, serology) from ..exceptions import InvalidAlleleError diff --git a/pyard/handlers/shortnull_handler.py b/pyard/handlers/shortnull_handler.py index fbba434..e32a9d0 100644 --- a/pyard/handlers/shortnull_handler.py +++ b/pyard/handlers/shortnull_handler.py @@ -7,15 +7,51 @@ class ShortNullHandler: - """Handles short null allele operations""" + """Handles short null allele operations + + Null alleles are HLA alleles that do not produce functional proteins + due to mutations. Short null alleles are abbreviated representations + of these null alleles. This class provides functionality to: + - Identify short null alleles + - Distinguish null alleles from other allele types + """ def __init__(self, ard_instance: "ARD"): + """Initialize the ShortNullHandler with an ARD instance + + Args: + ard_instance: The main ARD object containing configuration and data + """ self.ard = ard_instance def is_shortnull(self, allele: str) -> bool: - """Check if allele is a valid short null""" + """Check if allele is a valid short null + + Short null alleles are abbreviated forms of null alleles that are + recognized by the system. The check depends on both the allele being + in the short nulls database and the configuration allowing short null reduction. + + Args: + allele: Allele string to check + + Returns: + True if the allele is a valid short null and short null reduction + is enabled in configuration, False otherwise + """ return allele in self.ard.shortnulls and self.ard._config["reduce_shortnull"] def is_null(self, allele: str) -> bool: - """Check if allele is a null allele""" + """Check if allele is a null allele + + Null alleles are identified by the 'N' suffix in HLA nomenclature, + indicating they do not produce functional proteins. This method + distinguishes true null alleles from MAC codes that might also end with 'N'. + + Args: + allele: Allele string to check + + Returns: + True if the allele is a null allele (ends with 'N' but is not a MAC code), + False otherwise + """ return allele.endswith("N") and not self.ard.is_mac(allele) diff --git a/pyard/handlers/v2_handler.py b/pyard/handlers/v2_handler.py index a3106b2..822f38e 100644 --- a/pyard/handlers/v2_handler.py +++ b/pyard/handlers/v2_handler.py @@ -10,23 +10,52 @@ class V2Handler: - """Handles V2 to V3 nomenclature conversion""" + """Handles V2 to V3 nomenclature conversion + + HLA nomenclature has evolved over time. V2 (version 2) nomenclature used + a different format than the current V3 (version 3) standard. This class + provides functionality to: + - Identify V2 format alleles + - Convert V2 alleles to V3 format + - Use heuristics when direct mappings are unavailable + """ def __init__(self, ard_instance: "ARD"): + """Initialize the V2Handler with an ARD instance + + Args: + ard_instance: The main ARD object for database access and configuration + """ self.ard = ard_instance def is_v2(self, allele: str) -> bool: - """Check if allele is V2 nomenclature""" + """Check if allele is V2 nomenclature + + V2 alleles are characterized by having a '*' but no ':' separator, + and exclude certain loci (MICA, MICB, HFE). The method validates + by attempting conversion to V3 format and checking database existence. + + Args: + allele: Allele string to check (e.g., 'A*0101', 'B*2705') + + Returns: + True if the allele is valid V2 nomenclature and V2 reduction is enabled, + False otherwise + """ + # Check basic V2 format criteria matches_v2_format = ( - self.ard._config["reduce_v2"] - and "*" in allele - and ":" not in allele - and allele.split("*")[0] not in ["MICA", "MICB", "HFE"] + self.ard._config["reduce_v2"] # V2 reduction must be enabled + and "*" in allele # Must have locus separator + and ":" not in allele # Must not have field separators (V3 feature) + and allele.split("*")[0] + not in ["MICA", "MICB", "HFE"] # Exclude these loci ) if matches_v2_format: + # Attempt conversion to V3 format for validation v3_format_allele = self.map_v2_to_v3(allele) if v3_format_allele != allele: + # Check if converted allele is valid (MAC code or database allele) if v3_format_allele.split(":").pop().isalpha(): return self.ard.is_mac(v3_format_allele) return self.ard._is_allele_in_db(v3_format_allele) @@ -34,15 +63,40 @@ def is_v2(self, allele: str) -> bool: return False def map_v2_to_v3(self, v2_allele: str) -> str: - """Convert V2 allele to V3 format""" + """Convert V2 allele to V3 format + + Attempts to convert a V2 format allele to V3 format using database + mappings first, then falls back to heuristic prediction if no + direct mapping exists. + + Args: + v2_allele: V2 format allele (e.g., 'A*0101') + + Returns: + V3 format allele (e.g., 'A*01:01') or original if conversion fails + """ + # Try database lookup first v3_allele = db.v2_to_v3_allele(self.ard.db_connection, v2_allele) if not v3_allele: + # Fall back to heuristic prediction v3_allele = self._predict_v3(v2_allele) return v3_allele def _predict_v3(self, v2_allele: str) -> str: - """Use heuristic to predict V3 from V2""" + """Use heuristic to predict V3 from V2 + + Applies pattern-based rules to convert V2 format to V3 format when + no database mapping exists. Uses digit grouping and locus-specific + rules to insert colon separators appropriately. + + Args: + v2_allele: V2 format allele to convert + + Returns: + Predicted V3 format allele + """ locus, allele_name = v2_allele.split("*") + # Extract numeric and non-numeric parts components = re.findall(r"^(\d+)(.*)", allele_name) if not components: return v2_allele @@ -51,21 +105,27 @@ def _predict_v3(self, v2_allele: str) -> str: final_allele = digits_field num_of_digits = len(digits_field) + # Single digit alleles remain unchanged if num_of_digits == 1: return v2_allele + # Apply conversion rules based on digit count and locus if num_of_digits > 2: + # Special case for DP locus with 5 digits if locus.startswith("DP") and num_of_digits == 5: final_allele = ( digits_field[:3] + ":" + (digits_field[3:]) + non_digits_field ) + # Even number of digits: group in pairs elif num_of_digits % 2 == 0: final_allele = self._combine_with_colon(digits_field) + non_digits_field + # Odd number of digits: first 2, then remainder else: final_allele = ( digits_field[:2] + ":" + (digits_field[2:]) + non_digits_field ) else: + # 2 digits: add colon before non-digit suffix if present if non_digits_field: final_allele = digits_field + ":" + non_digits_field @@ -73,6 +133,16 @@ def _predict_v3(self, v2_allele: str) -> str: @staticmethod def _combine_with_colon(digits_field: str) -> str: - """Combine digits with colon separator""" + """Combine digits with colon separator + + Groups digits into pairs separated by colons for V3 format conversion. + Used when converting even-length digit sequences from V2 to V3. + + Args: + digits_field: String of digits to group (e.g., '0101') + + Returns: + Colon-separated digit pairs (e.g., '01:01') + """ num_of_digits = len(digits_field) return ":".join(digits_field[i : i + 2] for i in range(0, num_of_digits, 2)) diff --git a/pyard/handlers/xx_handler.py b/pyard/handlers/xx_handler.py index 8cf4c61..eaf170e 100644 --- a/pyard/handlers/xx_handler.py +++ b/pyard/handlers/xx_handler.py @@ -7,17 +7,44 @@ class XXHandler: - """Handles XX code operations""" + """Handles XX code operations + + XX codes are special HLA nomenclature designations that represent + groups of alleles sharing common serological or functional properties. + The 'XX' suffix indicates a broad grouping at the antigen level. + This class provides functionality to identify and validate XX codes. + """ def __init__(self, ard_instance: "ARD"): + """Initialize the XXHandler with an ARD instance + + Args: + ard_instance: The main ARD object for accessing code mappings + """ self.ard = ard_instance def is_xx(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: - """Check if string is a valid XX code""" + """Check if string is a valid XX code + + XX codes have the format 'LOCUS*ANTIGEN:XX' where XX is the literal + string 'XX'. Validates that the code suffix is 'XX' and that the + locus/antigen combination exists in the XX code mappings. + + Args: + glstring: String to check (e.g., 'A*01:XX', 'B*27:XX') + loc_antigen: Optional pre-parsed locus*antigen part + code: Optional pre-parsed code suffix + + Returns: + True if the string is a valid XX code, False otherwise + """ + # Parse the glstring if components not provided if loc_antigen is None or code is None: if ":" in glstring: loc_allele = glstring.split(":") loc_antigen, code = loc_allele[0], loc_allele[1] else: return False + + # Validate XX code: suffix must be 'XX' and locus*antigen must be in mappings return code == "XX" and loc_antigen in self.ard.code_mappings.xx_codes diff --git a/pyard/loader/g_group.py b/pyard/loader/g_group.py index 3e4036a..6c3485f 100644 --- a/pyard/loader/g_group.py +++ b/pyard/loader/g_group.py @@ -8,7 +8,35 @@ def load_g_group(imgt_version): - # load the hla_nom_g.txt + """ + load the hla_nom_g.txt + Sample file: + # file: hla_nom_g.txt + # date: 2025-10-08 + # version: IPD-IMGT/HLA 3.62.0 + # origin: http://hla.alleles.org/wmda/hla_nom_g.txt + # repository: https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/hla_nom_g.txt + # author: IPD Team (ipdsubs@anthonynolan.org) + A*;01:01:01:01/01:01:01:02N/01:01:01:03/ ... /01:481;01:01:01G + A*;01:01:02; + A*;01:01:03; + A*;01:01:04; + A*;01:01:05; + A*;01:01:06; + A*;01:01:07; + A*;01:01:08; + A*;01:01:09; + A*;01:01:10; + A*;01:01:11; + A*;01:01:12; + A*;01:01:13; + A*;01:01:14; + A*;01:01:15; + A*;01:01:16; + ... + :param imgt_version: version of IPD/IMGT database + :return: Table of data from hla_nom_g with "Locus", "A", "G", "2d", "3d", "lgx" columns + """ ars_g_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_g.txt" try: response = urlopen(ars_g_url) diff --git a/pyard/loader/p_group.py b/pyard/loader/p_group.py index 9aeee8a..c144133 100644 --- a/pyard/loader/p_group.py +++ b/pyard/loader/p_group.py @@ -8,7 +8,29 @@ def load_p_group(imgt_version): + """ # load the hla_nom_p.txt + + # file: hla_nom_p.txt + # date: 2025-10-08 + # version: IPD-IMGT/HLA 3.62.0 + # origin: http://hla.alleles.org/wmda/hla_nom_p.txt + # repository: https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/hla_nom_p.txt + # author: IPD Team (ipdsubs@anthonynolan.org) + A*;01:01:01:01/01:01:01:03/01:01:01:04/01:01:01:05/01:01:01:06/01:01:01:07/01:01:01:08/01:01:01:09/01:01:01:10/01:01:01:11/01:01:01:12/01:01:01:13/01:01:01:14/01:01:01:15/01:01:01:16/01:01:01:17/01:01:01:18/01:01:01:19/01:01:01:20/01:01:01:21/01:01:01:22/01:01:01:23/01:01:01:24/01:01:01:25/01:01:01:26/01:01:01:27/01:01:01:28/01:01:01:29/01:01:01:30/01:01:01:31/01:01:01:32/01:01:01:33/01:01:01:34/01:01:01:35/01:01:01:36/01:01:01:37/01:01:01:38/01:01:01:39/01:01:01:40/01:01:01:41/01:01:01:42/01:01:01:43/01:01:01:44/01:01:01:45/01:01:01:46/01:01:01:47/01:01:01:48/01:01:01:49/01:01:01:50/01:01:01:51/01:01:01:52/01:01:01:53/01:01:01:54/01:01:01:55/01:01:01:56/01:01:01:57/01:01:01:58/01:01:01:59/01:01:01:60/01:01:01:61/01:01:01:62/01:01:01:63/01:01:01:64/01:01:01:65/01:01:01:66/01:01:01:67/01:01:01:68/01:01:01:69/01:01:01:70/01:01:01:71/01:01:01:72/01:01:01:73/01:01:01:74/01:01:01:75/01:01:01:76/01:01:01:77/01:01:01:78/01:01:01:79/01:01:01:80/01:01:01:81/01:01:01:82/01:01:01:83/01:01:01:84/01:01:01:85/01:01:01:86/01:01:01:87/01:01:01:88/01:01:01:89/01:01:01:90/01:01:01:91/01:01:01:92/01:01:01:93/01:01:01:94/01:01:01:95/01:01:01:96/01:01:01:97/01:01:01:98/01:01:01:99/01:01:01:100/01:01:01:101/01:01:01:102/01:01:01:103/01:01:01:104/01:01:01:105/01:01:01:106/01:01:01:107/01:01:01:108/01:01:01:109/01:01:01:110/01:01:01:111/01:01:01:112/01:01:01:113/01:01:01:114/01:01:01:115/01:01:01:116/01:01:01:117/01:01:01:118/01:01:01:119/01:01:01:120/01:01:01:121/01:01:01:122/01:01:02/01:01:03/01:01:04/01:01:05/01:01:06/01:01:07/01:01:08/01:01:09/01:01:10/01:01:11/01:01:12/01:01:13/01:01:14/01:01:15/01:01:16/01:01:17/01:01:18/01:01:19/01:01:20/01:01:21/01:01:22/01:01:23/01:01:24/01:01:25/01:01:26/01:01:27/01:01:28/01:01:29/01:01:30/01:01:31/01:01:32/01:01:33/01:01:34/01:01:35/01:01:36/01:01:37/01:01:38L/01:01:39/01:01:40/01:01:41/01:01:42/01:01:43/01:01:44/01:01:45/01:01:46/01:01:47/01:01:48/01:01:49/01:01:50/01:01:51/01:01:52/01:01:53/01:01:54/01:01:55/01:01:56/01:01:57/01:01:58/01:01:59/01:01:60/01:01:61/01:01:62/01:01:63/01:01:64/01:01:65/01:01:66/01:01:67/01:01:68/01:01:69/01:01:70/01:01:71/01:01:72/01:01:73/01:01:74/01:01:75/01:01:76/01:01:77/01:01:78/01:01:79/01:01:80/01:01:81/01:01:82/01:01:83/01:01:84/01:01:85/01:01:86/01:01:87/01:01:88/01:01:89/01:01:90/01:01:91/01:01:92/01:01:93/01:01:94/01:01:95/01:01:96/01:01:97/01:01:98/01:01:99/01:01:100/01:01:101/01:01:102/01:01:103/01:01:104/01:01:105/01:01:106/01:01:107/01:01:108/01:01:109/01:01:110/01:01:111/01:01:112/01:01:113/01:01:114/01:01:115/01:01:116/01:01:117/01:01:118/01:01:119/01:01:120/01:01:121/01:01:122/01:01:123/01:01:124/01:01:125/01:01:126/01:01:127/01:01:128/01:01:129/01:01:130/01:01:131/01:01:132/01:01:133/01:01:134/01:01:135/01:01:136/01:01:137/01:01:138/01:01:139/01:01:140/01:01:141/01:01:142/01:01:143/01:01:144/01:01:145/01:01:146/01:01:147/01:01:148/01:01:149/01:01:150/01:01:151/01:01:152/01:01:153/01:01:154/01:01:155/01:01:156/01:01:157/01:01:158/01:01:159/01:01:160/01:01:161/01:01:162/01:01:163/01:01:164/01:01:165/01:01:166/01:01:167/01:32/01:37:01:01/01:37:01:02/01:45/01:81/01:103/01:107/01:109/01:132/01:141/01:142/01:155/01:177/01:212/01:217/01:234/01:237/01:246/01:248Q/01:249/01:251/01:252/01:253/01:261/01:274/01:276/01:277/01:280/01:281Q/01:288/01:291/01:295/01:296/01:297/01:300/01:305/01:306/01:309/01:316/01:317/01:319/01:323/01:324:01/01:324:02/01:325/01:331/01:332/01:335/01:338/01:346/01:347/01:349/01:351/01:353/01:356/01:357/01:358/01:367/01:368/01:369/01:370/01:371/01:372/01:377/01:378/01:383/01:385/01:386/01:387/01:388/01:389/01:390/01:392/01:404/01:406/01:407/01:409/01:410/01:415/01:419/01:422/01:426/01:436Q/01:440/01:441/01:443/01:444/01:445/01:446/01:456/01:460/01:461/01:467/01:468/01:471/01:472/01:473Q/01:476Q/01:481;01:01P + A*;01:02:01:01/01:02:01:02/01:02:01:03/01:02:02/01:412;01:02P + A*;01:03:01:01/01:03:01:02/01:03:02/01:03:03/01:315;01:03P + A*;01:06; + A*;01:07; + A*;01:08; + A*;01:09:01:01/01:09:01:02/01:09:02;01:09P + A*;01:10; + A*;01:12; + A*;01:13; + + :param imgt_version: version of IPD/IMGT database + :return: + """ ars_p_url = f"{IMGT_HLA_URL}{imgt_version}/wmda/hla_nom_p.txt" try: response = urlopen(ars_p_url) diff --git a/pyard/loader/serology.py b/pyard/loader/serology.py index f606e92..1c89d3c 100644 --- a/pyard/loader/serology.py +++ b/pyard/loader/serology.py @@ -13,6 +13,24 @@ def load_serology_mappings(imgt_version): """ Read `rel_dna_ser.txt` file that contains alleles and their serological equivalents. + # file: rel_dna_ser.txt + # date: 2025-10-08 + # version: IPD-IMGT/HLA 3.62.0 + # origin: http://hla.alleles.org/wmda/rel_dna_ser.txt + # repository: https://raw.githubusercontent.com/ANHIG/IMGTHLA/Latest/wmda/rel_dna_ser.txt + # author: WHO, Steven G. E. Marsh (steven.marsh@ucl.ac.uk) + A*;01:01:01:01;1;;; + A*;01:01:01:02N;0;;; + A*;01:01:01:03;1;;; + A*;01:01:01:04;1;;; + A*;01:01:01:05;1;;; + A*;01:01:01:06;1;;; + A*;01:01:01:07;1;;; + A*;01:01:01:08;1;;; + A*;01:01:01:09;1;;; + A*;01:01:01:10;1;;; + A*;01:01:01:11;1;;; + The fields of the Alleles->Serological mapping file are: Locus - HLA Locus Allele - HLA Allele Name diff --git a/pyard/reducers/base_reducer.py b/pyard/reducers/base_reducer.py index 38ee66d..7f32042 100644 --- a/pyard/reducers/base_reducer.py +++ b/pyard/reducers/base_reducer.py @@ -1,4 +1,11 @@ # -*- coding: utf-8 -*- +""" +Abstract Base Class for HLA Allele Reduction Strategies. + +This module defines the Reducer abstract base class that serves as the foundation +for all HLA allele reduction strategies in py-ard. It implements the Strategy +design pattern to allow interchangeable reduction algorithms. +""" from abc import ABC, abstractmethod @@ -9,12 +16,99 @@ class Reducer(ABC): - """Base class for all reduction strategies""" + """ + Abstract base class for all HLA allele reduction strategies. + + This class implements the Strategy design pattern, providing a common + interface for all reduction strategies while allowing each strategy to + implement its own specific reduction logic. All concrete reduction + strategies must inherit from this class and implement the reduce method. + + The Reducer class serves as the foundation for various HLA reduction types: + - G Group reduction (GGroupReducer) + - P Group reduction (PGroupReducer) + - LG/LGX reduction (LGReducer, LGXReducer) + - WHO reduction (WReducer) + - Serology reduction (SReducer) + - U2 reduction (U2Reducer) + - Exon reduction (ExonReducer) + - Default validation (DefaultReducer) + + Design Pattern: + This class implements the Strategy pattern, allowing the ARD system + to switch between different reduction algorithms at runtime based + on the requested reduction type. + + Attributes: + ard (ARD): The ARD instance containing all mapping data, database + connections, and utility methods needed for reduction. + + Example: + >>> class CustomReducer(Reducer): + ... def reduce(self, allele: str) -> str: + ... # Custom reduction logic here + ... return processed_allele + ... + >>> reducer = CustomReducer(ard_instance) + >>> result = reducer.reduce("A*01:01:01:01") + """ def __init__(self, ard_instance: "ARD"): + """ + Initialize the reducer with an ARD instance. + + Args: + ard_instance (ARD): The ARD instance containing mapping data, + database connections, and utility methods + required for allele reduction operations. + + Note: + The ARD instance provides access to: + - ARS mappings (G groups, P groups, LGX groups, etc.) + - Code mappings (WHO groups, MAC codes, etc.) + - Database connections for serology and other lookups + - Utility methods for validation and processing + """ self.ard = ard_instance @abstractmethod def reduce(self, allele: str) -> str: - """Reduce allele according to this strategy""" + """ + Reduce an HLA allele according to this strategy's specific logic. + + This is the core method that must be implemented by all concrete + reduction strategies. Each implementation should define how to + transform the input allele according to its specific reduction rules. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele according to this strategy's rules + (e.g., "A*01:01:01G" for G group reduction) + + Raises: + NotImplementedError: If called on the abstract base class directly + InvalidAlleleError: May be raised by concrete implementations + for invalid input alleles + + Examples: + This method's behavior depends on the concrete implementation: + + >>> g_reducer = GGroupReducer(ard) + >>> g_reducer.reduce("A*01:01:01:01") + "A*01:01:01G" + + >>> lg_reducer = LGReducer(ard) + >>> lg_reducer.reduce("A*01:01:01:01") + "A*01:01g" + + >>> s_reducer = SReducer(ard) + >>> s_reducer.reduce("A*01:01:01:01") + "A1" + + Note: + Concrete implementations should handle edge cases appropriately + and may use the ARD instance's methods and data for processing. + """ pass diff --git a/pyard/reducers/default_reducer.py b/pyard/reducers/default_reducer.py index cf848a6..c3fb933 100644 --- a/pyard/reducers/default_reducer.py +++ b/pyard/reducers/default_reducer.py @@ -1,23 +1,101 @@ # -*- coding: utf-8 -*- +""" +Default HLA Allele Reduction Strategy with Validation. + +This module implements the default reduction strategy that serves as a fallback +for other reducers and provides stringent validation of HLA alleles, particularly +those with P and G group suffixes. +""" + from .base_reducer import Reducer from ..exceptions import InvalidAlleleError class DefaultReducer(Reducer): - """Default strategy for handling P/G suffixes and validation""" + """ + Default strategy for HLA allele validation and P/G suffix handling. + + The DefaultReducer serves as the base validation strategy and fallback + for other reduction strategies. It performs stringent validation of + HLA alleles, with special handling for P group and G group suffixes. + + This reducer is particularly important for: + 1. Validating P and G group designations against official mappings + 2. Serving as a fallback when other reduction strategies cannot process an allele + 3. Providing comprehensive allele validation before returning results + 4. Raising appropriate exceptions for invalid alleles + + The validation process: + 1. Check P group suffixes against official P group mappings + 2. Check G group suffixes against official G group mappings + 3. Validate general allele format and existence + 4. Raise InvalidAlleleError for invalid alleles + + Examples: + - A*01:01P -> A*01:01P (if valid P group) + - A*01:01:01G -> A*01:01:01G (if valid G group) + - A*01:01:01:01 -> A*01:01:01:01 (if valid allele) + - A*99:99 -> InvalidAlleleError (if invalid) + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: - # Make this an explicit lookup to the g_group or p_group table - # for stringent validation + """ + Validate and return HLA allele with stringent P/G suffix checking. + + This method performs comprehensive validation of HLA alleles, with + special attention to P and G group suffixes. It ensures that any + allele with these suffixes actually exists in the official mapping + tables before accepting them as valid. + + Args: + allele (str): The HLA allele to validate (e.g., "A*01:01P", "A*01:01:01G") + + Returns: + str: The validated allele unchanged if valid + + Raises: + InvalidAlleleError: If the allele is invalid or suffix is not in official mappings + + Examples: + >>> reducer = DefaultReducer(ard) + >>> reducer.reduce("A*01:01P") + "A*01:01P" # if valid P group + >>> reducer.reduce("A*01:01:01G") + "A*01:01:01G" # if valid G group + >>> reducer.reduce("A*01:01:01:01") + "A*01:01:01:01" # if valid allele + >>> reducer.reduce("A*99:99") + InvalidAlleleError: A*99:99 is an invalid allele. + + Process: + 1. Check P group suffix validation against official mappings + 2. Check G group suffix validation against official mappings + 3. Perform general allele validation + 4. Raise exception for invalid alleles + """ + # Step 1: Stringent validation for P group suffixes if allele.endswith("P"): + # Verify that this P group designation exists in official P group mappings + # This prevents acceptance of arbitrary P suffixes if allele in self.ard.ars_mappings.p_group.values(): return allele + # If P suffix but not in official mappings, fall through to general validation + + # Step 2: Stringent validation for G group suffixes elif allele.endswith("G"): + # Verify that this G group designation exists in official G group mappings + # This prevents acceptance of arbitrary G suffixes if allele in self.ard.ars_mappings.g_group.values(): return allele + # If G suffix but not in official mappings, fall through to general validation + # Step 3: General allele validation if self.ard.is_valid_allele(allele): return allele else: + # Step 4: Raise exception for invalid alleles + # This ensures that invalid alleles are caught and reported raise InvalidAlleleError(f"{allele} is an invalid allele.") diff --git a/pyard/reducers/exon_reducer.py b/pyard/reducers/exon_reducer.py index 8cb1c05..d49949e 100644 --- a/pyard/reducers/exon_reducer.py +++ b/pyard/reducers/exon_reducer.py @@ -1,30 +1,103 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for Exon-Level (3-Field) Reduction. + +This module implements the exon reduction strategy for HLA alleles. +Exon reduction reduces alleles to the 3-field level, which typically +represents the exon-level resolution including intron variations. +""" + from .base_reducer import Reducer from ..constants import expression_chars class ExonReducer(Reducer): - """Strategy for exon reduction""" + """ + Strategy for exon (3-field) reduction of HLA alleles. + + Exon reduction reduces HLA alleles to their 3-field representation, + which captures exon-level sequence differences including variations + in non-coding regions (introns). This level of resolution is useful + when intron variations are relevant for analysis. + + The reduction process: + 1. Check for pre-computed exon group mappings + 2. Handle expression character preservation (N, L, S, etc.) + 3. For unmapped alleles, expand to WHO level first, then reduce + 4. Avoid infinite recursion with appropriate termination conditions + + Examples: + - A*01:01:01:01 -> A*01:01:01 + - A*01:01:01:02N -> A*01:01:01N (preserves expression character) + - B*07:02:01:01 -> B*07:02:01 + - DRB1*15:01:01:01 -> DRB1*15:01:01 + + Note: + Expression characters (N, L, S, etc.) are preserved when they + represent consistent expression patterns across all 4-field variants. + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele to its exon (3-field) representation. + + This method performs exon reduction by first checking for pre-computed + exon group mappings, handling expression character preservation, and + using WHO expansion as a fallback strategy for unmapped alleles. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in exon (3-field) format (e.g., "A*01:01:01") + + Examples: + >>> reducer = ExonReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01:01" + >>> reducer.reduce("A*01:01:01:02N") + "A*01:01:01N" # expression character preserved + >>> reducer.reduce("B*07:02:01:01") + "B*07:02:01" + + Process: + 1. Check for pre-computed exon group mapping + 2. Handle expression character preservation if applicable + 3. For unmapped alleles, expand to WHO level first + 4. Recursively apply exon reduction to WHO-expanded form + """ + # Step 1: Check for pre-computed exon group mapping if allele in self.ard.ars_mappings.exon_group: + # Get the base 3-field exon group allele exon_group_allele = self.ard.ars_mappings.exon_group[allele] - # Check if the 3 field exon allele has a 4 field alleles - # that all have the same expression characters + # Step 2: Handle expression character preservation + # Check if original allele has an expression character (N, L, S, etc.) last_char = allele[-1] if last_char in expression_chars: + # Create 3-field allele with preserved expression character exon_short_null_allele = exon_group_allele + last_char + + # Verify that this expression variant is valid (shortnull check) if self.ard.is_shortnull(exon_short_null_allele): return exon_short_null_allele + + # Return base exon group allele (no expression character needed) return exon_group_allele else: - # Expand to W level and then reduce to exon + # Step 3: Fallback strategy for unmapped alleles + # First expand to WHO level to get complete nomenclature w_redux = self.ard.redux(allele, "W") - # If the W redux produces 2 field allele or the same allele, don't recurse + + # Step 4: Avoid infinite recursion + # If WHO reduction doesn't change the allele or results in 2-field, + # return original to prevent recursion if w_redux == allele or len(w_redux.split(":")) == 2: return allele else: - # recurse with the W fields + # Recursively apply exon reduction to the WHO-expanded form + # This handles cases where WHO expansion provides mappable alleles return self.ard.redux(w_redux, "exon") diff --git a/pyard/reducers/g_reducer.py b/pyard/reducers/g_reducer.py index d6b2b1f..774c648 100644 --- a/pyard/reducers/g_reducer.py +++ b/pyard/reducers/g_reducer.py @@ -1,16 +1,73 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for G Groups. + +This module implements the G group reduction strategy for HLA alleles. +G groups represent alleles that have identical nucleotide sequences across +the exons encoding the antigen recognition domain (ARD). +""" + from .default_reducer import DefaultReducer class GGroupReducer(DefaultReducer): - """Strategy for G group reduction""" + """ + Strategy for G group reduction of HLA alleles. + + G groups are collections of HLA alleles that have identical nucleotide + sequences in the exons encoding the antigen recognition domain (ARD). + This reduction is important for HLA matching as alleles within the same + G group are functionally equivalent for transplantation purposes. + + The G group reduction follows this priority: + 1. Check for duplicate G group mappings (dup_g) - handles special cases + 2. Use standard G group mappings (g_group) + 3. Fall back to default reduction if no G group mapping exists + + Examples: + - A*01:01:01:01 -> A*01:01:01G + - A*01:01:01:02N -> A*01:01:01G (same G group as above) + - B*07:02:01 -> B*07:02:01G + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele to its G group representation. + + This method performs G group reduction by checking multiple mapping tables + in order of priority. It first checks for duplicate G group mappings, + then standard G group mappings, and finally falls back to default reduction. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in G group format (e.g., "A*01:01:01G") + + Examples: + >>> reducer = GGroupReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01:01G" + >>> reducer.reduce("A*01:01:01:02N") + "A*01:01:01G" + >>> reducer.reduce("B*07:02:01") + "B*07:02:01G" + + Note: + If the allele is not found in any G group mapping, it falls back + to the default reduction strategy from the parent class. + """ + # Check if allele has a G group mapping if allele in self.ard.ars_mappings.g_group: + # Priority 1: Check for duplicate G group mappings (special cases) if allele in self.ard.ars_mappings.dup_g: return self.ard.ars_mappings.dup_g[allele] else: + # Priority 2: Use standard G group mapping return self.ard.ars_mappings.g_group[allele] + # Priority 3: Fall back to default reduction if no G group mapping exists return super().reduce(allele) diff --git a/pyard/reducers/lg_reducer.py b/pyard/reducers/lg_reducer.py index d01580c..b1aa24f 100644 --- a/pyard/reducers/lg_reducer.py +++ b/pyard/reducers/lg_reducer.py @@ -1,34 +1,134 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategies for LG and LGX levels. + +This module implements reduction strategies for HLA alleles to the LG (2-field + 'g' suffix) +and LGX (2-field only) levels, which are commonly used in HLA typing and matching. +""" + from .base_reducer import Reducer class LGXReducer(Reducer): - """Strategy for lgx reduction""" + """ + Strategy for LGX (2-field) reduction of HLA alleles. + + The LGX reduction reduces HLA alleles to their 2-field representation, + which corresponds to the Antigen Recognition Domain (ARD) level. + This is the most commonly used reduction level for HLA matching. + + Examples: + - A*01:01:01:01 -> A*01:01 + - B*07:02:01 -> B*07:02 + - DRB1*15:01:01:01 -> DRB1*15:01 + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele to its LGX (2-field) representation. + + This method first checks if the allele exists in the pre-computed LGX group + mappings. If found, it returns the mapped value. Otherwise, it performs + a simple field-based reduction by taking only the first two fields. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in LGX format (e.g., "A*01:01") + + Examples: + >>> reducer = LGXReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01" + >>> reducer.reduce("B*07:02:01") + "B*07:02" + """ + # Check if allele has a pre-computed LGX mapping if allele in self.ard.ars_mappings.lgx_group: return self.ard.ars_mappings.lgx_group[allele] else: - # Return allele with only first 2 fields + # Fallback: manually extract first 2 fields (locus + first two numeric fields) return ":".join(allele.split(":")[0:2]) class LGReducer(Reducer): - """Strategy for lg reduction (lgx + g suffix)""" + """ + Strategy for LG reduction of HLA alleles (LGX + 'g' suffix). + + The LG reduction is similar to LGX but adds a 'g' suffix to indicate + that the allele has been reduced to the 2-field level. This suffix + helps distinguish between original 2-field typings and reduced typings. + + Examples: + - A*01:01:01:01 -> A*01:01g + - B*07:02:01 -> B*07:02g + - DRB1*15:01:01:01 -> DRB1*15:01g + + Note: + If the ARD configuration has 'ARS_as_lg' set to True, the suffix + 'ARS' is used instead of 'g'. + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele to its LG representation (LGX + suffix). + + This method first performs LGX reduction and then adds the appropriate + suffix ('g' or 'ARS' depending on configuration). + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in LG format (e.g., "A*01:01g") + + Examples: + >>> reducer = LGReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01g" + >>> reducer.reduce("B*07:02:01") + "B*07:02g" + """ + # First perform LGX reduction lgx_strategy = LGXReducer(self.ard) redux_allele = lgx_strategy.reduce(allele) + + # Add appropriate suffix return self._add_lg_suffix(redux_allele) def _add_lg_suffix(self, redux_allele: str) -> str: - """Add lg suffix to reduced allele""" + """ + Add the LG suffix ('g' or 'ARS') to a reduced allele. + + This method handles both single alleles and allele lists (separated by '/'). + The suffix used depends on the ARD configuration setting 'ARS_as_lg'. + + Args: + redux_allele (str): The reduced allele or allele list to add suffix to + + Returns: + str: The allele(s) with appropriate LG suffix added + + Examples: + >>> reducer._add_lg_suffix("A*01:01") + "A*01:01g" + >>> reducer._add_lg_suffix("A*01:01/A*01:02") + "A*01:01g/A*01:02g" + """ + # Handle allele lists (multiple alleles separated by '/') if "/" in redux_allele: return "/".join( [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] ) + + # Add suffix based on configuration if self.ard._config["ARS_as_lg"]: return redux_allele + "ARS" return redux_allele + "g" diff --git a/pyard/reducers/p_reducer.py b/pyard/reducers/p_reducer.py index 968f425..3b27205 100644 --- a/pyard/reducers/p_reducer.py +++ b/pyard/reducers/p_reducer.py @@ -1,14 +1,74 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for P Groups. + +This module implements the P group reduction strategy for HLA alleles. +P groups represent alleles that have identical protein sequences when +expressed, making them functionally equivalent at the protein level. +""" from .default_reducer import DefaultReducer class PGroupReducer(DefaultReducer): - """Strategy for P group reduction""" + """ + Strategy for P group reduction of HLA alleles. + + P groups are collections of HLA alleles that encode identical protein + sequences when expressed. This reduction is particularly useful when + the focus is on the functional protein product rather than the specific + nucleotide sequence differences that don't affect the final protein. + + P group reduction is broader than G group reduction, as it groups together + alleles that may have different nucleotide sequences but produce the same + protein. This makes P groups useful for functional analysis and certain + types of HLA matching where protein-level equivalence is sufficient. + + Examples: + - A*01:01:01:01 -> A*01:01P + - A*01:01:01:02N -> A*01:01P (same protein as above) + - B*07:02:01 -> B*07:02P + + Note: + In py-ard's "ping" mode (default), when an allele doesn't have a G group, + its corresponding P group is used instead, making P groups particularly + important for comprehensive allele reduction. + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele to its P group representation. + + This method performs P group reduction by checking if the allele exists + in the P group mapping table. If found, it returns the corresponding + P group designation. If not found, it falls back to the default + reduction strategy from the parent class. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in P group format (e.g., "A*01:01P") + + Examples: + >>> reducer = PGroupReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01P" + >>> reducer.reduce("A*01:01:01:02N") + "A*01:01P" + >>> reducer.reduce("B*07:02:01") + "B*07:02P" + + Note: + If the allele is not found in the P group mapping, it falls back + to the default reduction strategy from the parent class. + """ + # Check if allele has a P group mapping if allele in self.ard.ars_mappings.p_group: return self.ard.ars_mappings.p_group[allele] + # Fall back to default reduction if no P group mapping exists return super().reduce(allele) diff --git a/pyard/reducers/s_reducer.py b/pyard/reducers/s_reducer.py index 258470e..1764919 100644 --- a/pyard/reducers/s_reducer.py +++ b/pyard/reducers/s_reducer.py @@ -1,4 +1,11 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for Serology Groups. + +This module implements the serology reduction strategy for HLA alleles. +Serology reduction converts molecular HLA alleles back to their corresponding +serological equivalents, which were historically used before DNA-based typing. +""" import functools @@ -8,32 +15,96 @@ class SReducer(Reducer): - """Strategy for serology reduction""" + """ + Strategy for serology reduction of HLA alleles. + + Serology reduction converts molecular HLA alleles to their corresponding + serological equivalents. This is important for compatibility with legacy + systems and historical HLA typing data that used serological methods + before DNA-based typing became standard. + + The reduction process: + 1. Determines if the allele is 2-field and reduces to LGX if needed + 2. Queries the serology mapping database to find corresponding serologies + 3. Handles both direct matches and LGX-reduced matches + 4. Returns sorted serology designations + + Examples: + - A*01:01:01:01 -> A1 + - A*02:01:01:01 -> A2 + - B*07:02:01 -> B7 + - DRB1*15:01:01:01 -> DR15 + + Note: + Some alleles may map to multiple serological equivalents, + in which case they are returned as a slash-separated list. + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: - # find serology equivalent in serology_mapping + """ + Reduce an HLA allele to its serological equivalent(s). + + This method performs serology reduction by querying the serology mapping + database to find which serological designations correspond to the given + molecular allele. It handles both 2-field and multi-field alleles with + appropriate fallback strategies. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The serological equivalent(s) (e.g., "A1" or "A1/A36" for multiple matches) + + Examples: + >>> reducer = SReducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A1" + >>> reducer.reduce("A*02:01:01:01") + "A2" + >>> reducer.reduce("B*07:02:01") + "B7" + + Process: + 1. Check if allele is 2-field and reduce to LGX if necessary + 2. Query serology mapping database + 3. Find matching serologies by checking allele lists + 4. If no matches and allele is 2-field, try LGX-reduced matching + 5. Return sorted serology designations + """ + # Step 1: Handle 2-field alleles by reducing to LGX first if is_2_field_allele(allele): allele = self.ard._redux_allele(allele, "lgx") + # Query serology mapping using LGX-specific allele lists serology_mapping = db.find_serology_for_allele( self.ard.db_connection, allele, "lgx_allele_list" ) else: + # Query serology mapping for multi-field alleles serology_mapping = db.find_serology_for_allele( self.ard.db_connection, allele ) + # Step 2: Find serologies that contain this allele in their allele lists serology_set = set() for serology, allele_list in serology_mapping.items(): + # Check if our allele is in the slash-separated allele list if allele in allele_list.split("/"): serology_set.add(serology) + # Step 3: Fallback strategy for 2-field alleles with no direct matches if not serology_set and is_2_field_allele(allele): + # Try matching against LGX-reduced versions of the allele lists for serology, allele_list in serology_mapping.items(): + # Reduce the entire allele list to LGX and check for matches allele_list_lgx = self.ard.redux(allele_list, "lgx") if allele in allele_list_lgx.split("/"): serology_set.add(serology) + # Step 4: Return sorted serology designations + # Use smart sort comparator to ensure proper HLA ordering return "/".join( sorted( serology_set, key=functools.cmp_to_key(self.ard.smart_sort_comparator) diff --git a/pyard/reducers/u2_reducer.py b/pyard/reducers/u2_reducer.py index 141c5ab..463ece3 100644 --- a/pyard/reducers/u2_reducer.py +++ b/pyard/reducers/u2_reducer.py @@ -1,23 +1,89 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for Unambiguous 2-Field Reduction. + +This module implements the U2 (Unambiguous 2-field) reduction strategy for HLA alleles. +U2 reduction attempts to reduce alleles to 2-field level only when it results in +an unambiguous representation, otherwise falls back to LGX reduction. +""" from .base_reducer import Reducer from ..misc import get_n_field_allele class U2Reducer(Reducer): - """Strategy for U2 reduction""" + """ + Strategy for U2 (Unambiguous 2-field) reduction of HLA alleles. + + U2 reduction is a conservative approach that only reduces alleles to + 2-field level when the reduction is unambiguous. This ensures that + the reduced form maintains the same specificity as the original allele + without introducing ambiguity. + + The reduction logic: + 1. If allele is already 2-field, return as-is + 2. If 2-field reduction exists unambiguously in database, use it + 3. If 2-field reduction would be ambiguous, fall back to LGX reduction + + This approach is particularly useful when you need to reduce resolution + but want to avoid creating ambiguous typings that could represent + multiple distinct alleles. + + Examples: + - A*01:01:01:01 -> A*01:01 (if A*01:01 is unambiguous) + - A*01:01:01:01 -> A*01:01 (if A*01:01 would be ambiguous, falls back to LGX) + - B*07:02 -> B*07:02 (already 2-field, returned as-is) + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce an HLA allele using U2 (Unambiguous 2-field) strategy. + + This method performs U2 reduction by checking if a 2-field reduction + would be unambiguous. If the 2-field form exists as a valid allele + in the database, it's considered unambiguous and returned. Otherwise, + the method falls back to LGX reduction to avoid ambiguity. + + Args: + allele (str): The HLA allele to reduce (e.g., "A*01:01:01:01") + + Returns: + str: The reduced allele in U2 format (e.g., "A*01:01" or LGX fallback) + + Examples: + >>> reducer = U2Reducer(ard) + >>> reducer.reduce("A*01:01:01:01") + "A*01:01" # if unambiguous + >>> reducer.reduce("B*07:02:01") + "B*07:02" # if unambiguous + >>> reducer.reduce("C*01:02:01") + "C*01:02" # falls back to LGX if 2-field would be ambiguous + + Process: + 1. Check if allele is already 2-field (return as-is) + 2. Extract 2-field version while preserving expression suffixes + 3. Verify if 2-field version exists unambiguously in database + 4. Return 2-field if unambiguous, otherwise fall back to LGX + """ + # Step 1: Parse allele fields allele_fields = allele.split(":") - # If resolved out to second field leave alone + + # Step 2: If already at 2-field level, no reduction needed if len(allele_fields) == 2: return allele - # If the 2 field reduction is unambiguous, reduce to 2 field level + # Step 3: Attempt 2-field reduction with expression preservation + # Extract first 2 fields while preserving any expression suffixes (N, L, S, etc.) allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) + + # Step 4: Check if 2-field reduction is unambiguous if self.ard._is_allele_in_db(allele_2_fields): + # 2-field form exists in database - it's unambiguous return allele_2_fields else: - # If ambiguous, reduce to G group level + # 2-field form would be ambiguous - fall back to LGX reduction + # This ensures we don't create ambiguous typings return self.ard._redux_allele(allele, "lgx") diff --git a/pyard/reducers/w_reducer.py b/pyard/reducers/w_reducer.py index de45ef8..c84e581 100644 --- a/pyard/reducers/w_reducer.py +++ b/pyard/reducers/w_reducer.py @@ -1,18 +1,90 @@ # -*- coding: utf-8 -*- +""" +HLA Allele Reduction Strategy for WHO Nomenclature. + +This module implements the W (WHO) reduction strategy for HLA alleles. +WHO reduction expands or reduces alleles to conform to the official +WHO nomenclature standards for HLA typing. +""" from .base_reducer import Reducer class WReducer(Reducer): - """Strategy for W (WHO) reduction""" + """ + Strategy for W (WHO) reduction of HLA alleles. + + WHO reduction ensures that HLA alleles conform to the official World Health + Organization (WHO) nomenclature standards. This reduction can both expand + and reduce alleles to their proper WHO-compliant representation, which + typically means full-field nomenclature (4, 3, or 2 fields as appropriate). + + The WHO nomenclature system is the official standard defined by the + WHO Nomenclature Committee for Factors of the HLA System and ensures + consistent representation of HLA alleles across different systems and + databases. + + The reduction logic: + 1. If allele is already WHO-compliant, return as-is + 2. If allele has WHO group mapping, expand to WHO-compliant form + 3. If no mapping exists, return original allele unchanged + + Examples: + - A*01:01 -> A*01:01:01:01 (expansion to full WHO nomenclature) + - B*07:02 -> B*07:02:01 (expansion to appropriate WHO level) + - DRB1*15:01 -> DRB1*15:01:01:01 (full WHO expansion) + + Note: + WHO reduction may result in expansion rather than reduction, + as it aims for the most complete and standardized representation. + + Attributes: + ard: The ARD (Antigen Recognition Domain) object containing mapping data + """ - # @override def reduce(self, allele: str) -> str: + """ + Reduce/expand an HLA allele to WHO nomenclature standard. + + This method performs WHO reduction by checking if the allele already + conforms to WHO standards, and if not, attempts to expand it using + WHO group mappings. The process may result in expansion rather than + reduction to achieve WHO compliance. + + Args: + allele (str): The HLA allele to process (e.g., "A*01:01") + + Returns: + str: The WHO-compliant allele representation (e.g., "A*01:01:01:01") + + Examples: + >>> reducer = WReducer(ard) + >>> reducer.reduce("A*01:01") + "A*01:01:01:01" # expanded to full WHO nomenclature + >>> reducer.reduce("B*07:02") + "B*07:02:01" # expanded to appropriate WHO level + >>> reducer.reduce("A*01:01:01:01") + "A*01:01:01:01" # already WHO-compliant, returned as-is + + Process: + 1. Check if allele is already WHO-compliant + 2. If not, look up WHO group mapping + 3. Recursively apply WHO reduction to mapped alleles + 4. Return original if no mapping exists + """ + # Step 1: Check if allele already conforms to WHO nomenclature if self.ard._is_who_allele(allele): return allele + + # Step 2: Look up WHO group mapping for expansion if allele in self.ard.code_mappings.who_group: - return self.ard.redux( - "/".join(self.ard.code_mappings.who_group[allele]), "W" - ) + # Get the list of WHO-compliant alleles for this input + who_alleles = self.ard.code_mappings.who_group[allele] + + # Recursively apply WHO reduction to the mapped alleles + # This handles cases where the mapping itself needs further WHO processing + return self.ard.redux("/".join(who_alleles), "W") else: + # Step 3: No WHO mapping found - return original allele + # This preserves alleles that don't have WHO group mappings return allele From e5ba343aec7a274a893af23d53e26eae4c2e7732 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 3 Nov 2025 13:57:51 -0600 Subject: [PATCH 20/24] Document pyard.* modules --- pyard/blender.py | 109 +++++++++++++++++++++-------- pyard/misc.py | 166 +++++++++++++++++++++++++++++++++++++++++++- pyard/serology.py | 148 ++++++++++++++++++++++++++++++++------- pyard/smart_sort.py | 94 +++++++++++++++---------- 4 files changed, 425 insertions(+), 92 deletions(-) diff --git a/pyard/blender.py b/pyard/blender.py index 0612102..0ff10bd 100644 --- a/pyard/blender.py +++ b/pyard/blender.py @@ -24,18 +24,41 @@ def blender(drb1, drb3="", drb4="", drb5=""): + """Blend DRB1 typing with DRB3/4/5 to determine expected DRBX expression + + The DRB locus region contains multiple genes (DRB1, DRB3, DRB4, DRB5) but + only certain combinations are expressed based on DRB1 allele families. + This function validates that the provided DRBX typing matches the expected + pattern based on DRB1 and returns the appropriate DRBX expression. + + Args: + drb1: DRB1 typing (e.g., 'DRB1*03:01+DRB1*04:01') + drb3: DRB3 typing if present + drb4: DRB4 typing if present + drb5: DRB5 typing if present + + Returns: + Expected DRBX expression based on DRB1 families, or empty string + + Raises: + DRBXBlenderError: If provided DRBX doesn't match expected pattern + """ + # Parse DRB1 typing to extract allele families try: drb1_1, drb1_2 = drb1.split("+") drb1_allele_1 = drb1_1.split("*")[1] drb1_allele_2 = drb1_2.split("*")[1] - drb1_fam_1 = drb1_allele_1.split(":")[0] - drb1_fam_2 = drb1_allele_2.split(":")[0] + drb1_fam_1 = drb1_allele_1.split(":")[0] # First field (family) + drb1_fam_2 = drb1_allele_2.split(":")[0] # First field (family) except Exception: return "" + # Map DRB1 families to expected DRBX genes (3, 4, 5, or 0 for none) x1 = expdrbx(drb1_fam_1) x2 = expdrbx(drb1_fam_2) + # Create sorted combination code (e.g., '34', '44', '00') xx = "".join(sorted([x1, x2])) + # Handle case where no DRBX genes should be expressed if xx == "00": if drb3 != "": raise DRBXBlenderError("DRB3", "none") @@ -45,17 +68,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): raise DRBXBlenderError("DRB5", "none") return "" - # handle 03 + # Handle heterozygous case: one allele expresses DRB3, other doesn't if xx == "03": if drb4 != "": raise DRBXBlenderError("DRB4", "none") if drb5 != "": raise DRBXBlenderError("DRB5", "none") if drb3 != "": - # if 2 copies + # Check if DRB3 has two copies (homozygous DRB3-expressing alleles) drb3_g = drb3.split("+") if len(drb3_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb3_g[1] == drb3_g[0]: return drb3_g[0] else: @@ -64,17 +87,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): return drb3 return "" - # handle 04 + # Handle heterozygous case: one allele expresses DRB4, other doesn't if xx == "04": if drb3 != "": raise DRBXBlenderError("DRB3", "none") if drb5 != "": raise DRBXBlenderError("DRB5", "none") if drb4 != "": - # if 2 copies + # Check if DRB4 has two copies (homozygous DRB4-expressing alleles) drb4_g = drb4.split("+") if len(drb4_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb4_g[1] == drb4_g[0]: return drb4_g[0] else: @@ -83,17 +106,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): return drb4 return "" - # handle 05 + # Handle heterozygous case: one allele expresses DRB5, other doesn't if xx == "05": if drb3 != "": raise DRBXBlenderError("DRB3", "none") if drb4 != "": raise DRBXBlenderError("DRB4", "none") if drb5 != "": - # if 2 copies + # Check if DRB5 has two copies (homozygous DRB5-expressing alleles) drb5_g = drb5.split("+") if len(drb5_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb5_g[1] == drb5_g[0]: return drb5_g[0] else: @@ -101,7 +124,7 @@ def blender(drb1, drb3="", drb4="", drb5=""): else: return drb5 return "" - # handle 33 + # Handle homozygous DRB3-expressing case if xx == "33": if drb4 != "": raise DRBXBlenderError("DRB4", "none") @@ -111,7 +134,7 @@ def blender(drb1, drb3="", drb4="", drb5=""): return drb3 return "" - # handle 44 + # Handle homozygous DRB4-expressing case if xx == "44": if drb3 != "": raise DRBXBlenderError("DRB3", "none") @@ -121,7 +144,7 @@ def blender(drb1, drb3="", drb4="", drb5=""): return drb4 return "" - # handle 55 + # Handle homozygous DRB5-expressing case if xx == "55": if drb3 != "": raise DRBXBlenderError("DRB3", "none") @@ -131,17 +154,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): return drb5 return "" - # handle 34 + # Handle heterozygous case: one allele expresses DRB3, other expresses DRB4 if xx == "34": if drb5 != "": raise DRBXBlenderError("DRB5", "none") retg = [] if drb3 != "": - # if 2 copies + # Process DRB3 typing drb3_g = drb3.split("+") if len(drb3_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb3_g[1] == drb3_g[0]: retg.append(drb3_g[0]) else: @@ -149,10 +172,10 @@ def blender(drb1, drb3="", drb4="", drb5=""): elif len(drb3_g) == 1: retg.append(drb3_g[0]) if drb4 != "": - # if 2 copies + # Process DRB4 typing drb4_g = drb4.split("+") if len(drb4_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb4_g[1] == drb4_g[0]: retg.append(drb4_g[0]) else: @@ -162,17 +185,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): return "+".join(retg) - # handle 35 + # Handle heterozygous case: one allele expresses DRB3, other expresses DRB5 if xx == "35": if drb4 != "": raise DRBXBlenderError("DRB4", "none") retg = [] if drb3 != "": - # if 2 copies + # Process DRB3 typing drb3_g = drb3.split("+") if len(drb3_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb3_g[1] == drb3_g[0]: retg.append(drb3_g[0]) else: @@ -180,10 +203,10 @@ def blender(drb1, drb3="", drb4="", drb5=""): elif len(drb3_g) == 1: retg.append(drb3_g[0]) if drb5 != "": - # if 2 copies + # Process DRB5 typing drb5_g = drb5.split("+") if len(drb5_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb5_g[1] == drb5_g[0]: retg.append(drb5_g[0]) else: @@ -193,17 +216,17 @@ def blender(drb1, drb3="", drb4="", drb5=""): return "+".join(retg) - # handle 45 + # Handle heterozygous case: one allele expresses DRB4, other expresses DRB5 if xx == "45": if drb3 != "": raise DRBXBlenderError("DRB3", "none") retg = [] if drb4 != "": - # if 2 copies + # Process DRB4 typing drb4_g = drb4.split("+") if len(drb4_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb4_g[1] == drb4_g[0]: retg.append(drb4_g[0]) else: @@ -211,10 +234,10 @@ def blender(drb1, drb3="", drb4="", drb5=""): elif len(drb4_g) == 1: retg.append(drb4_g[0]) if drb5 != "": - # if 2 copies + # Process DRB5 typing drb5_g = drb5.split("+") if len(drb5_g) == 2: - # homozygous, return one copy + # If homozygous, return one copy if drb5_g[1] == drb5_g[0]: retg.append(drb5_g[0]) else: @@ -229,17 +252,45 @@ def blender(drb1, drb3="", drb4="", drb5=""): def expdrbx(drb1_fam): + """Map DRB1 allele family to expected DRBX gene expression + + Different DRB1 allele families are associated with expression of + different DRBX genes based on linkage disequilibrium patterns. + + Args: + drb1_fam: DRB1 allele family (first field, e.g., '03', '04', '15') + + Returns: + String indicating expected DRBX gene: + '3' for DRB3, '4' for DRB4, '5' for DRB5, '0' for none + """ + # DRB1 families associated with DRB3 expression if drb1_fam in ["03", "05", "06", "11", "12", "13", "14"]: return "3" + # DRB1 families associated with DRB4 expression if drb1_fam in ["04", "07", "09"]: return "4" + # DRB1 families associated with DRB5 expression if drb1_fam in ["02", "15", "16"]: return "5" + # DRB1 families with no associated DRBX expression return "0" class DRBXBlenderError(Exception): + """Exception raised when DRBX typing doesn't match expected pattern + + This error occurs when the provided DRB3/4/5 typing is inconsistent + with what should be expressed based on the DRB1 allele families. + """ + def __init__(self, found, expected): + """Initialize the error with found and expected values + + Args: + found: What was actually provided in the typing + expected: What should have been provided based on DRB1 + """ self.found = found self.expected = expected diff --git a/pyard/misc.py b/pyard/misc.py index 9fc4f6c..13a3cee 100644 --- a/pyard/misc.py +++ b/pyard/misc.py @@ -37,16 +37,31 @@ def get_n_field_allele(allele: str, n: int, preserve_expression=False) -> str: :param preserve_expression: keep the expression character ? :return: trimmed to n fields of the original allele """ + # Check if allele ends with expression character (N, L, S, C, A, Q) last_char = allele[-1] fields = allele.split(":") + # Preserve expression character if requested and present, and we're reducing fields if preserve_expression and last_char in expression_chars and len(fields) > n: return ":".join(fields[0:n]) + last_char else: + # Standard field reduction without expression character return ":".join(fields[0:n]) def get_3field_allele(a: str) -> str: + """Reduce allele to 3 fields, removing P/G group suffixes + + Converts alleles like 'A*01:01:01:01G' to 'A*01:01:01' + by removing P/G group indicators and reducing to 3 fields. + + Args: + a: HLA allele string + + Returns: + 3-field allele without P/G suffixes + """ last_char = a[-1] + # Remove P or G group suffixes before field reduction if last_char in P_and_G_chars: a = a[:-1] @@ -54,65 +69,159 @@ def get_3field_allele(a: str) -> str: def get_2field_allele(a: str) -> str: + """Reduce allele to 2 fields, removing P/G group suffixes + + Converts alleles like 'A*01:01:01:01G' to 'A*01:01' + by removing P/G group indicators and reducing to 2 fields. + + Args: + a: HLA allele string + + Returns: + 2-field allele without P/G suffixes + """ last_char = a[-1] + # Remove P or G group suffixes before field reduction if last_char in P_and_G_chars: a = a[:-1] return get_n_field_allele(a, 2) def get_1field_allele(a: str) -> str: + """Reduce allele to 1 field (locus and allele group only) + + Converts alleles like 'A*01:01:01:01' to 'A*01' + + Args: + a: HLA allele string + + Returns: + 1-field allele (locus*group) + """ return get_n_field_allele(a, 1) def number_of_fields(allele: str) -> int: + """Count the number of fields in an HLA allele + + Fields are separated by colons in HLA nomenclature. + + Args: + allele: HLA allele string + + Returns: + Number of colon-separated fields + """ return len(allele.split(":")) def is_2_field_allele(allele: str) -> bool: + """Check if allele has exactly 2 fields + + Args: + allele: HLA allele string + + Returns: + True if allele has exactly 2 colon-separated fields + """ return number_of_fields(allele) == 2 -# computes a valid G name based on the ambiguity string def get_G_name(a: str) -> str: + """Compute a valid G group name from an allele or ambiguity string + + G groups represent alleles with identical exon 2 and 3 sequences. + This function creates a standardized G group name by taking the first + allele from ambiguous strings and formatting it appropriately. + + Args: + a: Allele or ambiguous allele string + + Returns: + Standardized G group name with 'G' suffix + """ + # Take first allele if ambiguous (contains '/') a = a.split("/")[0] last_char = a[-1] + # Remove existing P/G group or expression suffixes if last_char in P_and_G_chars + expression_chars: a = a[:-1] + # For 2-field alleles, add '01' as third field before 'G' suffix if len(a.split(":")) == 2: return ":".join([a, "01"]) + "G" else: + # For 3+ field alleles, use first 3 fields with 'G' suffix return ":".join(a.split(":")[0:3]) + "G" -# computes a valid P name based on the ambiguity string def get_P_name(a: str) -> str: + """Compute a valid P group name from an allele or ambiguity string + + P groups represent alleles with identical protein sequences. + This function creates a standardized P group name using the first + two fields of the allele. + + Args: + a: Allele or ambiguous allele string + + Returns: + Standardized P group name with 'P' suffix + """ + # Take first allele if ambiguous (contains '/') a = a.split("/")[0] last_char = a[-1] + # Remove existing P/G group or expression suffixes if last_char in P_and_G_chars + expression_chars: a = a[:-1] + # Use first 2 fields with 'P' suffix return ":".join(a.split(":")[0:2]) + "P" def get_imgt_db_versions() -> List[str]: + """Fetch available IPD-IMGT/HLA database versions from GitHub + + Queries the ANHIG/IMGTHLA repository to get all available branch names, + which correspond to different database versions. + + Returns: + List of available database version names + + Raises: + Network errors if GitHub API is unreachable + """ import urllib.request import json + # Query GitHub API for IMGT/HLA repository branches req = urllib.request.Request( url="https://api.github.com/repos/ANHIG/IMGTHLA/branches?per_page=100" ) res = urllib.request.urlopen(req, timeout=5) if res.status == 200: json_body = json.loads(res.read()) + # Extract branch names as version identifiers versions = list(map(lambda x: x["name"], json_body)) return versions def download_to_file(url: str, local_filename: str): + """Download content from URL and save to local file + + Downloads text content from a URL and writes it to a local file. + Used for fetching IMGT/HLA database files and other resources. + + Args: + url: URL to download from + local_filename: Local file path to save content + + Prints error message if download fails + """ import urllib.request req = urllib.request.Request(url) res = urllib.request.urlopen(req, timeout=5) if res.status == 200: + # Decode content as UTF-8 text and write to file file_content = res.read().decode("utf-8") with open(local_filename, "wt") as f: f.write(file_content) @@ -121,18 +230,49 @@ def download_to_file(url: str, local_filename: str): def get_data_dir(data_dir): + """Validate and return data directory path + + Validates the provided data directory or returns the default directory + if none is specified. Ensures the directory exists and is accessible. + + Args: + data_dir: User-specified data directory path or None + + Returns: + Validated pathlib.Path object for data directory + + Raises: + RuntimeError: If specified directory doesn't exist or isn't a directory + """ if data_dir: path = pathlib.Path(data_dir) + # Validate that the specified path exists and is a directory if not path.exists() or not path.is_dir(): raise RuntimeError(f"{data_dir} is not a valid directory") data_dir = path else: + # Use default directory if none specified data_dir = get_default_db_directory() return data_dir def get_imgt_version(imgt_version): + """Validate and normalize IMGT database version + + Converts version strings like '3.51.0' to '3510' format used internally. + Returns 'Latest' if no version is specified. + + Args: + imgt_version: Version string (e.g., '3.51.0') or None + + Returns: + Normalized version string ('3510') or 'Latest' + + Raises: + RuntimeError: If version format is invalid + """ if imgt_version: + # Remove dots and validate that result is numeric version = imgt_version.replace(".", "") if version.isdigit(): return version @@ -143,13 +283,35 @@ def get_imgt_version(imgt_version): def get_default_db_directory(): + """Get the default directory for py-ard database files + + Creates a user-specific directory in the system temp directory + for storing SQLite database files and cached data. + + Returns: + pathlib.Path object for default database directory + """ try: + # Get current username for directory naming username = getpass.getuser() except OSError: + # Fallback if username cannot be determined username = "nonuser" + # Create path in system temp directory with user-specific name return pathlib.Path(tempfile.gettempdir()) / f"pyard-{username}" def validate_reduction_type(ars_type): + """Validate that reduction type is supported + + Checks that the provided reduction type is one of the valid options + supported by py-ard (G, P, lg, lgx, W, exon, U2, S). + + Args: + ars_type: Reduction type string to validate + + Raises: + ValueError: If reduction type is not supported + """ if ars_type not in VALID_REDUCTION_MODES: raise ValueError(f"Reduction type needs to be one of {VALID_REDUCTION_MODES}") diff --git a/pyard/serology.py b/pyard/serology.py index b2a5bf5..7efdee5 100644 --- a/pyard/serology.py +++ b/pyard/serology.py @@ -38,54 +38,71 @@ # # Mapping Generated from `dna_relshp.csv` file # +# Mapping of broad antigens to their split antigens at the DNA/molecular level +# Broad antigens are general serological designations that were later found +# to represent multiple distinct molecular alleles (splits) broad_splits_dna_mapping = { - "A*09": ["A*23", "A*24"], - "A*10": ["A*25", "A*26", "A*34", "A*66"], - "A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"], - "A*28": ["A*68", "A*69"], - "B*05": ["B*51", "B*52"], - "B*12": ["B*44", "B*45"], - "B*16": ["B*38", "B*39"], - "B*17": ["B*57", "B*58"], - "B*21": ["B*49", "B*50"], - "B*22": ["B*54", "B*55", "B*56"], - "C*10": ["C*03", "C*04"], - "DQB1*01": ["DQB1*05", "DQB1*06"], - "DRB1*02": ["DRB1*15", "DRB1*16"], - "DRB1*06": ["DRB1*13", "DRB1*14"], + "A*09": ["A*23", "A*24"], # A9 broad splits into A23, A24 + "A*10": ["A*25", "A*26", "A*34", "A*66"], # A10 broad splits + "A*19": ["A*29", "A*30", "A*31", "A*32", "A*33", "A*74"], # A19 broad splits + "A*28": ["A*68", "A*69"], # A28 broad splits into A68, A69 + "B*05": ["B*51", "B*52"], # B5 broad splits into B51, B52 + "B*12": ["B*44", "B*45"], # B12 broad splits into B44, B45 + "B*16": ["B*38", "B*39"], # B16 broad splits into B38, B39 + "B*17": ["B*57", "B*58"], # B17 broad splits into B57, B58 + "B*21": ["B*49", "B*50"], # B21 broad splits into B49, B50 + "B*22": ["B*54", "B*55", "B*56"], # B22 broad splits + "C*10": ["C*03", "C*04"], # Cw10 broad splits into Cw3, Cw4 + "DQB1*01": ["DQB1*05", "DQB1*06"], # DQ1 broad splits into DQ5, DQ6 + "DRB1*02": ["DRB1*15", "DRB1*16"], # DR2 broad splits into DR15, DR16 + "DRB1*06": ["DRB1*13", "DRB1*14"], # DR6 broad splits into DR13, DR14 } +# Special mappings for serologies that don't follow standard XX code patterns +# These serologies map to different molecular families than their numeric designation suggests serology_xx_exception_mapping = { # Locus B - # Broad B40 + # Broad B40 - these serologies map to B*40 family despite different numbers "B60": "B*40:XX", "B61": "B*40:XX", - # Broad B14 + # Broad B14 - these serologies map to B*14 family "B64": "B*14:XX", "B65": "B*14:XX", - # Broad B15 + # Broad B15 - these serologies map to B*15 family despite different numbers "B62": "B*15:XX", "B63": "B*15:XX", "B70": "B*15:XX", "B75": "B*15:XX", "B76": "B*15:XX", "B77": "B*15:XX", - # Broad B70 + # Broad B70 - these also map to B*15 family "B71": "B*15:XX", "B72": "B*15:XX", + # DR17/18 are splits of DR3 "DR17": "DRB1*03:XX", "DR18": "DRB1*03:XX", # Locus DQB1 - # Broad DQ3 + # Broad DQ3 - these DQ serologies map to DQB1*03 family "DQ7": "DQB1*03:XX", "DQ8": "DQB1*03:XX", "DQ9": "DQB1*03:XX", } +# Regular expression to separate locus letters from antigen numbers in serology +# Matches non-digit characters followed by digits (e.g., 'A1' -> 'A' and '1') sero_antigen_regex = re.compile(r"(\D+)(\d+)") class SerologyMapping: + """Handles mapping between serological and molecular HLA typing + + This class manages the relationships between historical serological + designations and modern molecular typing, including broad/split + relationships and XX code mappings. + """ + + # Complete list of recognized serological designations by locus + # Based on official WHO nomenclature committee recognized serology valid_serology_map = { "A": [ "A1", @@ -241,10 +258,29 @@ class SerologyMapping: } def __init__(self, broad_splits_mapping, associated_mapping): + """Initialize SerologyMapping with broad/splits and associated antigen mappings + + Args: + broad_splits_mapping: Dictionary mapping broad antigens to their splits + associated_mapping: Dictionary mapping serologies to associated antigens + """ self.broad_splits_map = broad_splits_mapping self.serology_associated_map = associated_mapping def find_splits(self, allele: str) -> tuple: + """Find broad/split relationships for a given allele or serology + + Determines if the input is a broad antigen (returns its splits) or + a split antigen (returns its broad). Handles both serological and + molecular designations. + + Args: + allele: Serological or molecular designation + + Returns: + Tuple of (broad, [splits]) or None if no relationship found + """ + # Check if input has HLA- prefix and extract the core designation if HLA_regex.search(allele): prefix = True allele_name = allele.split("-")[1] @@ -252,23 +288,48 @@ def find_splits(self, allele: str) -> tuple: prefix = False allele_name = allele + # Choose appropriate mapping based on molecular (*) vs serological format if "*" in allele_name: - mapping = broad_splits_dna_mapping + mapping = broad_splits_dna_mapping # Use DNA-level mappings else: - mapping = self.broad_splits_map + mapping = self.broad_splits_map # Use serological mappings + # Check if input is a broad antigen if allele_name in mapping: return self._get_mapping(allele_name, mapping, prefix) + # Check if input is a split antigen (find its broad) for broad in mapping: if allele_name in mapping[broad]: return self._get_mapping(broad, mapping, prefix) + return tuple() # No relationship found def find_associated_antigen(self, serology): + """Find the associated antigen for a given serology + + Some serological designations have associated or equivalent antigens. + Returns the associated antigen if one exists, otherwise returns the + original serology. + + Args: + serology: Serological designation + + Returns: + Associated antigen or original serology if no association exists + """ return self.serology_associated_map.get(serology, serology) def get_xx_mappings(self): + """Generate XX code mappings for all valid serologies + + Creates a comprehensive mapping from serological designations to + their corresponding XX codes (broad molecular groupings). + + Returns: + Dictionary mapping serology names to XX codes + """ all_xx_mappings = {} + # Process each locus and its serologies for locus, serologies in SerologyMapping.valid_serology_map.items(): xx_mapping = { serology: self._map_serology_to_xx(locus, serology) @@ -279,29 +340,66 @@ def get_xx_mappings(self): @classmethod def get_valid_serology_names(cls): + """Get set of all valid serological designation names + + Flattens the valid_serology_map to create a single set containing + all recognized serological designations across all loci. + + Returns: + Set of all valid serology names + """ all_serology_names = {x for v in cls.valid_serology_map.values() for x in v} return all_serology_names def _map_serology_to_xx(self, locus, serology): + """Map a serology to its corresponding XX code + + Converts serological designations to XX codes, which represent + broad molecular groupings. Handles special exception cases and + standard numeric conversions. + + Args: + locus: HLA locus (A, B, C, DRB1, etc.) + serology: Serological designation + + Returns: + XX code string (e.g., 'A*01:XX', 'B*27:XX') + """ + # Check for special exception mappings first if serology in serology_xx_exception_mapping.keys(): return serology_xx_exception_mapping[serology] - # Use the associated serology for XX version + # Use the associated serology for XX version (handles equivalencies) serology = self.find_associated_antigen(serology) - # Extract just the digits + # Extract the numeric part from serology (e.g., '27' from 'B27') antigen_group = sero_antigen_regex.match(serology).group(2) - # Pad numbers with 0 for single digit numbers + # Pad single digit numbers with leading zero for consistency antigen_group_num = int(antigen_group) if antigen_group_num < 10: antigen_group = f"{antigen_group_num:02}" - # Build the XX allele + # Build the XX allele in standard format return f"{locus}*{antigen_group}:XX" @classmethod def _get_mapping(cls, broad, mapping, prefix): + """Format broad/split mapping results with appropriate prefixes + + Adds 'HLA-' prefix to results if the original input had this prefix, + maintaining consistent formatting. + + Args: + broad: Broad antigen designation + mapping: Dictionary containing broad to splits mappings + prefix: Whether to add 'HLA-' prefix to results + + Returns: + Tuple of (broad, [splits]) with appropriate prefixes + """ if prefix: + # Add HLA- prefix to both broad and all splits return "HLA-" + broad, list(map(lambda x: "HLA-" + x, mapping[broad])) else: + # Return without prefix return broad, mapping[broad] diff --git a/pyard/smart_sort.py b/pyard/smart_sort.py index c94aa4b..ef8d05c 100644 --- a/pyard/smart_sort.py +++ b/pyard/smart_sort.py @@ -26,65 +26,82 @@ from pyard import constants -expr_regex = re.compile("[PNQLSGg]") -glstring_chars = re.compile("[/|+^~]") -serology_splitter = re.compile(r"(\D+)(\d+)") +# Regular expressions for parsing HLA nomenclature components +expr_regex = re.compile( + "[PNQLSGg]" +) # Expression characters (P/G groups, null alleles, etc.) +glstring_chars = re.compile("[/|+^~]") # GL string delimiter characters +serology_splitter = re.compile( + r"(\D+)(\d+)" +) # Separates locus letters from numbers in serology @functools.lru_cache(maxsize=constants.DEFAULT_CACHE_SIZE) def smart_sort_comparator(a1, a2, ignore_suffixes=()): """ - Natural sort 2 given alleles. - - Python sorts strings lexicographically but HLA alleles need - to be sorted by numerical values in each field of the HLA nomenclature. - - If allele suffixes are in ignore_suffixes, comparison results in that - appearing later. - - :param a1: first allele - :param a2: second allele - :param ignore_suffix: tuple of suffixes + Natural sort 2 given alleles using HLA nomenclature rules. + + Python's default lexicographic sorting doesn't work correctly for HLA alleles + because it treats field values as strings rather than numbers. This function + implements proper numerical sorting for each field in HLA nomenclature. + + Sorting hierarchy: + 1. Handle identical alleles + 2. Handle GL string delimiters (lexicographic fallback) + 3. Handle ignored suffixes (push to end) + 4. Handle serological designations (numeric comparison) + 5. Handle molecular alleles (field-by-field numeric comparison) + + :param a1: first allele to compare + :param a2: second allele to compare + :param ignore_suffixes: tuple of allele suffixes to sort last + :return: -1 if a1 < a2, 0 if equal, 1 if a1 > a2 """ - # Check to see if they are the same alleles + # Quick equality check - identical alleles are equal if a1 == a2: return 0 - # GL String matches + # Handle GL string delimiters - fall back to lexicographic sorting + # GL strings with delimiters (/|+^~) are complex and sorted lexicographically if re.search(glstring_chars, a1) or re.search(glstring_chars, a2): if a1 > a2: return 1 else: return -1 + # Handle ignored suffixes - push alleles with these suffixes to the end + # This allows certain allele types to be sorted last (e.g., expression variants) if ignore_suffixes and "*" in a1: _, fields = a1.split("*") if fields in ignore_suffixes: - return 1 + return 1 # a1 comes after a2 if ignore_suffixes and "*" in a2: _, fields = a2.split("*") if fields in ignore_suffixes: - return -1 + return -1 # a2 comes after a1 - # remove any non-numerics + # Remove expression characters (P, N, Q, L, S, G, g) for comparison + # This normalizes alleles like 'A*01:01N' to 'A*01:01' for sorting a1 = re.sub(expr_regex, "", a1) a2 = re.sub(expr_regex, "", a2) - # Check to see if they are still the same alleles + # Check equality again after removing expression characters if a1 == a2: return 0 - # Handle serology + # Handle serological designations (no colon separator) + # Compare numeric parts of serology (e.g., '27' in 'B27') if ":" not in a1: serology1_match = serology_splitter.match(a1) - serology1_num = int(serology1_match.group(2)) + serology1_num = int(serology1_match.group(2)) # Extract numeric part serology2_match = serology_splitter.match(a2) - serology2_num = int(serology2_match.group(2)) + serology2_num = int(serology2_match.group(2)) # Extract numeric part return 1 if serology1_num > serology2_num else -1 - # Extract and Compare 1st fields first + # Compare first field (allele group) numerically + # Extract numbers between '*' and first ':' (e.g., '01' from 'A*01:01') a1_f1 = int(a1[a1.find("*") + 1 : a1.find(":")]) a2_f1 = int(a2[a2.find("*") + 1 : a2.find(":")]) @@ -93,10 +110,11 @@ def smart_sort_comparator(a1, a2, ignore_suffixes=()): if a1_f1 > a2_f1: return 1 + # Split alleles into fields for detailed comparison a1_fields = a1.split(":") a2_fields = a2.split(":") - # If the first fields are equal, try the 2nd fields + # Compare second field (protein variation) numerically a1_f2 = int(a1_fields[1]) a2_f2 = int(a2_fields[1]) @@ -105,47 +123,51 @@ def smart_sort_comparator(a1, a2, ignore_suffixes=()): if a1_f2 > a2_f2: return 1 - # If the second fields are equal, try the 3rd fields + # Compare third field (synonymous DNA variation) numerically + # Handle missing fields or non-numeric values gracefully if len(a1_fields) > 2: try: a1_f3 = int(a1_fields[2]) except ValueError: - a1_f3 = 0 + a1_f3 = 0 # Non-numeric third field treated as 0 else: - a1_f3 = 0 + a1_f3 = 0 # Missing third field treated as 0 + if len(a2_fields) > 2: try: a2_f3 = int(a2_fields[2]) except ValueError: - a2_f3 = 0 + a2_f3 = 0 # Non-numeric third field treated as 0 else: - a2_f3 = 0 + a2_f3 = 0 # Missing third field treated as 0 if a1_f3 < a2_f3: return -1 if a1_f3 > a2_f3: return 1 - # If the third fields are equal, try the 4th fields + # Compare fourth field (non-coding variation) numerically + # Handle missing fields or non-numeric values gracefully if len(a1_fields) > 3: try: a1_f4 = int(a1_fields[3]) except ValueError: - a1_f4 = 0 + a1_f4 = 0 # Non-numeric fourth field treated as 0 else: - a1_f4 = 0 + a1_f4 = 0 # Missing fourth field treated as 0 + if len(a2_fields) > 3: try: a2_f4 = int(a2_fields[3]) except ValueError: - a2_f4 = 0 + a2_f4 = 0 # Non-numeric fourth field treated as 0 else: - a2_f4 = 0 + a2_f4 = 0 # Missing fourth field treated as 0 if a1_f4 < a2_f4: return -1 if a1_f4 > a2_f4: return 1 - # All fields are considered equal after 4th field + # All compared fields are equal - alleles are considered equivalent for sorting return 0 From 2437d686acd704e9cdadd72bd3483e2f755a1121 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 3 Nov 2025 15:45:07 -0600 Subject: [PATCH 21/24] Handler Tests - Add tests for all handlers --- tests/unit/handlers/__init__.py | 1 + tests/unit/handlers/test_allele_handler.py | 73 +++++++ tests/unit/handlers/test_gl_string_handler.py | 150 +++++++++++++++ tests/unit/handlers/test_mac_handler.py | 180 ++++++++++++++++++ tests/unit/handlers/test_serology_handler.py | 132 +++++++++++++ tests/unit/handlers/test_shortnull_handler.py | 124 ++++++++++++ tests/unit/handlers/test_v2_handler.py | 178 +++++++++++++++++ tests/unit/handlers/test_xx_handler.py | 111 +++++++++++ 8 files changed, 949 insertions(+) create mode 100644 tests/unit/handlers/__init__.py create mode 100644 tests/unit/handlers/test_allele_handler.py create mode 100644 tests/unit/handlers/test_gl_string_handler.py create mode 100644 tests/unit/handlers/test_mac_handler.py create mode 100644 tests/unit/handlers/test_serology_handler.py create mode 100644 tests/unit/handlers/test_shortnull_handler.py create mode 100644 tests/unit/handlers/test_v2_handler.py create mode 100644 tests/unit/handlers/test_xx_handler.py diff --git a/tests/unit/handlers/__init__.py b/tests/unit/handlers/__init__.py new file mode 100644 index 0000000..40a96af --- /dev/null +++ b/tests/unit/handlers/__init__.py @@ -0,0 +1 @@ +# -*- coding: utf-8 -*- diff --git a/tests/unit/handlers/test_allele_handler.py b/tests/unit/handlers/test_allele_handler.py new file mode 100644 index 0000000..80b40c2 --- /dev/null +++ b/tests/unit/handlers/test_allele_handler.py @@ -0,0 +1,73 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, MagicMock + +from pyard.handlers.allele_handler import AlleleHandler + + +class TestAlleleHandler: + """Test cases for AlleleHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard._config = {"ARS_as_lg": False} + return ard + + @pytest.fixture + def mock_strategy_factory(self): + """Create mock strategy factory""" + factory = Mock() + mock_strategy = Mock() + mock_strategy.reduce.return_value = "A*01:01G" + factory.get_strategy.return_value = mock_strategy + return factory + + @pytest.fixture + def allele_handler(self, mock_ard, mock_strategy_factory): + """Create AlleleHandler instance with mocked dependencies""" + handler = AlleleHandler(mock_ard) + handler.strategy_factory = mock_strategy_factory + return handler + + def test_init(self, mock_ard): + """Test AlleleHandler initialization""" + handler = AlleleHandler(mock_ard) + assert handler.ard == mock_ard + assert handler.strategy_factory is not None + + def test_reduce_allele(self, allele_handler, mock_strategy_factory): + """Test reduce_allele method""" + result = allele_handler.reduce_allele("A*01:01:01", "G") + + mock_strategy_factory.get_strategy.assert_called_once_with("G") + mock_strategy_factory.get_strategy.return_value.reduce.assert_called_once_with( + "A*01:01:01" + ) + assert result == "A*01:01G" + + def test_add_lg_suffix_single_allele_default(self, allele_handler): + """Test add_lg_suffix with single allele using default 'g' suffix""" + result = allele_handler.add_lg_suffix("A*01:01") + assert result == "A*01:01g" + + def test_add_lg_suffix_single_allele_ars(self, mock_ard): + """Test add_lg_suffix with single allele using ARS suffix""" + mock_ard._config = {"ARS_as_lg": True} + handler = AlleleHandler(mock_ard) + result = handler.add_lg_suffix("A*01:01") + assert result == "A*01:01ARS" + + def test_add_lg_suffix_multiple_alleles(self, allele_handler): + """Test add_lg_suffix with multiple alleles separated by '/'""" + result = allele_handler.add_lg_suffix("A*01:01/A*01:02") + assert result == "A*01:01g/A*01:02g" + + def test_add_lg_suffix_multiple_alleles_ars(self, mock_ard): + """Test add_lg_suffix with multiple alleles using ARS suffix""" + mock_ard._config = {"ARS_as_lg": True} + handler = AlleleHandler(mock_ard) + result = handler.add_lg_suffix("A*01:01/A*01:02") + assert result == "A*01:01ARS/A*01:02ARS" diff --git a/tests/unit/handlers/test_gl_string_handler.py b/tests/unit/handlers/test_gl_string_handler.py new file mode 100644 index 0000000..7025467 --- /dev/null +++ b/tests/unit/handlers/test_gl_string_handler.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock + +from pyard.handlers.gl_string_processor import GLStringHandler +from pyard.exceptions import InvalidAlleleError + + +class TestGLStringHandler: + """Test cases for GLStringHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + + def redux_side_effect(x, y): + # Mock the redux method to return the input string + return x + + def sort_comparator(a, b, ignore_suffixes=()): + # Simple lexicographic comparison for consistent sorting + if a < b: + return -1 + elif a > b: + return 1 + else: + return 0 + + ard = Mock() + ard._config = {"strict": False, "ignore_allele_with_suffixes": ()} + ard.redux.side_effect = redux_side_effect + ard.smart_sort_comparator.side_effect = sort_comparator + ard._is_valid.return_value = True + return ard + + @pytest.fixture + def gl_handler(self, mock_ard): + """Create GLStringHandler instance""" + return GLStringHandler(mock_ard) + + def test_init(self, mock_ard): + """Test GLStringHandler initialization""" + handler = GLStringHandler(mock_ard) + assert handler.ard == mock_ard + + def test_process_gl_string_single_allele(self, gl_handler): + """Test processing single allele (no delimiters)""" + result = gl_handler.process_gl_string("A*01:01", "G") + assert result == "A*01:01" + + def test_process_gl_string_caret_delimiter(self, gl_handler): + """Test processing GL string with ^ delimiter""" + result = gl_handler.process_gl_string("A*01:01^B*07:02", "G") + # Results are sorted - check that both components are present + assert "A*01:01" in result and "B*07:02" in result and "^" in result + + def test_process_gl_string_pipe_delimiter(self, gl_handler): + """Test processing GL string with | delimiter""" + result = gl_handler.process_gl_string("A*01:01|B*07:02", "G") + # Results are sorted - check that both components are present + assert "A*01:01" in result and "B*07:02" in result and "|" in result + + def test_process_gl_string_plus_delimiter(self, gl_handler): + """Test processing GL string with + delimiter""" + result = gl_handler.process_gl_string("A*01:01+A*02:01", "G") + assert result == "A*01:01+A*02:01" + + def test_process_gl_string_tilde_delimiter(self, gl_handler): + """Test processing GL string with ~ delimiter""" + result = gl_handler.process_gl_string("A*01:01~A*02:01", "G") + assert result == "A*01:01~A*02:01" + + def test_process_gl_string_slash_delimiter(self, gl_handler): + """Test processing GL string with / delimiter""" + result = gl_handler.process_gl_string("A*01:01/A*02:01", "lgx") + # Results are sorted - the actual sorting puts A*01:01 before A*02:01 + assert result == "A*01:01/A*02:01" + + def test_process_gl_string_strict_mode_valid(self, mock_ard): + """Test processing with strict mode enabled and valid alleles""" + mock_ard._config["strict"] = True + handler = GLStringHandler(mock_ard) + result = handler.process_gl_string("A*01:01", "G") + assert result == "A*01:01" + mock_ard._is_valid.assert_called_once_with("A*01:01") + + def test_process_gl_string_strict_mode_invalid(self, mock_ard): + """Test processing with strict mode enabled and invalid alleles""" + mock_ard._config["strict"] = True + mock_ard._is_valid.return_value = False + handler = GLStringHandler(mock_ard) + + with pytest.raises(InvalidAlleleError): + handler.process_gl_string("INVALID", "G") + + def test_sorted_unique_gl_tilde_preserves_order(self, gl_handler): + """Test that ~ delimiter preserves original order""" + result = gl_handler._sorted_unique_gl(["B*07:02", "A*01:01"], "~") + assert result == "B*07:02~A*01:01" + + def test_sorted_unique_gl_plus_sorts(self, gl_handler): + """Test that + delimiter sorts components""" + result = gl_handler._sorted_unique_gl(["B*07:02", "A*01:01"], "+") + assert "+" in result + + def test_sorted_unique_gl_other_delimiters_flatten(self, gl_handler): + """Test that other delimiters flatten and deduplicate""" + result = gl_handler._sorted_unique_gl(["A*01:01/A*02:01", "A*01:01"], "/") + # Should flatten and deduplicate + assert "/" in result + + def test_validate_gl_string_single_valid_allele(self, gl_handler): + """Test validation of single valid allele""" + result = gl_handler.validate_gl_string("A*01:01") + assert result is True + + def test_validate_gl_string_single_invalid_allele(self, mock_ard): + """Test validation of single invalid allele""" + mock_ard._is_valid.return_value = False + handler = GLStringHandler(mock_ard) + + with pytest.raises(InvalidAlleleError): + handler.validate_gl_string("INVALID") + + def test_validate_gl_string_with_delimiters(self, gl_handler): + """Test validation of GL string with delimiters""" + result = gl_handler.validate_gl_string("A*01:01^B*07:02") + assert result is True + + def test_validate_gl_string_mixed_valid_invalid(self, mock_ard): + """Test validation with mix of valid and invalid alleles""" + mock_ard._is_valid.side_effect = lambda x: x != "INVALID" + handler = GLStringHandler(mock_ard) + + with pytest.raises(InvalidAlleleError): + handler.validate_gl_string("A*01:01^INVALID") + + @pytest.mark.parametrize( + "redux_type", ["G", "P", "lg", "lgx", "W", "exon", "U2", "S"] + ) + def test_process_gl_string_valid_redux_types(self, gl_handler, redux_type): + """Test processing with all valid reduction types""" + result = gl_handler.process_gl_string("A*01:01", redux_type) + assert result == "A*01:01" + + def test_process_gl_string_invalid_redux_type(self, gl_handler): + """Test processing with invalid reduction type""" + with pytest.raises(ValueError): + gl_handler.process_gl_string("A*01:01", "INVALID") diff --git a/tests/unit/handlers/test_mac_handler.py b/tests/unit/handlers/test_mac_handler.py new file mode 100644 index 0000000..eca5c2d --- /dev/null +++ b/tests/unit/handlers/test_mac_handler.py @@ -0,0 +1,180 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch +import sqlite3 + +from pyard.handlers.mac_handler import MACHandler +from pyard.exceptions import InvalidMACError + + +class TestMACHandler: + """Test cases for MACHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard.db_connection = Mock() + ard.smart_sort_comparator.return_value = 0 + ard._is_allele_in_db.return_value = True + return ard + + @pytest.fixture + def mac_handler(self, mock_ard): + """Create MACHandler instance""" + return MACHandler(mock_ard) + + def test_init(self, mock_ard): + """Test MACHandler initialization""" + handler = MACHandler(mock_ard) + assert handler.ard == mock_ard + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_is_mac_valid_alphabetic_code(self, mock_mac_to_alleles, mac_handler): + """Test is_mac with valid alphabetic MAC code""" + mock_mac_to_alleles.return_value = ["01:01", "01:02"] + + result = mac_handler.is_mac("A*01:AB") + assert result is True + mock_mac_to_alleles.assert_called_once_with(mac_handler.ard.db_connection, "AB") + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_is_mac_valid_with_antigen_validation( + self, mock_mac_to_alleles, mac_handler + ): + """Test is_mac with antigen group validation""" + mock_mac_to_alleles.return_value = ["01:01", "01:02", "02:01"] + + result = mac_handler.is_mac("A*01:AB") + assert result is True + + def test_is_mac_no_colon(self, mac_handler): + """Test is_mac with string without colon""" + result = mac_handler.is_mac("A01AB") + assert result is False + + def test_is_mac_numeric_code(self, mac_handler): + """Test is_mac with numeric code (not alphabetic)""" + result = mac_handler.is_mac("A*01:01") + assert result is False + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_is_mac_database_error(self, mock_mac_to_alleles, mac_handler): + """Test is_mac with database error""" + mock_mac_to_alleles.side_effect = sqlite3.OperationalError("DB Error") + + result = mac_handler.is_mac("A*01:AB") + assert result is False + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_is_mac_no_alleles_found(self, mock_mac_to_alleles, mac_handler): + """Test is_mac when no alleles found for code""" + mock_mac_to_alleles.return_value = [] + + result = mac_handler.is_mac("A*01:AB") + assert result is False + + def test_expand_mac_valid_standard_format(self, mac_handler): + """Test expand_mac with valid standard format MAC""" + with patch.object(mac_handler, "is_mac", return_value=True), patch.object( + mac_handler, "get_alleles", return_value=["A*01:01", "A*01:02"] + ): + result = mac_handler.expand_mac("A*01:AB") + assert result == "A*01:01/A*01:02" + + def test_expand_mac_valid_hla_prefixed(self, mac_handler): + """Test expand_mac with HLA-prefixed format""" + with patch.object(mac_handler, "is_mac", return_value=True), patch.object( + mac_handler, "get_alleles", return_value=["A*01:01", "A*01:02"] + ): + result = mac_handler.expand_mac("HLA-A*01:AB") + assert result == "HLA-A*01:01/HLA-A*01:02" + + def test_expand_mac_invalid(self, mac_handler): + """Test expand_mac with invalid MAC code""" + with patch.object(mac_handler, "is_mac", return_value=False): + with pytest.raises(InvalidMACError): + mac_handler.expand_mac("INVALID") + + @patch("pyard.handlers.mac_handler.db.alleles_to_mac_code") + def test_lookup_mac_single_antigen_group(self, mock_alleles_to_mac, mac_handler): + """Test lookup_mac with single antigen group optimization""" + mock_alleles_to_mac.return_value = "AB" + + result = mac_handler.lookup_mac("A*01:01/A*01:02") + assert result == "A*01:AB" + mock_alleles_to_mac.assert_called_once_with( + mac_handler.ard.db_connection, "01/02" + ) + + @patch("pyard.handlers.mac_handler.db.alleles_to_mac_code") + def test_lookup_mac_given_order(self, mock_alleles_to_mac, mac_handler): + """Test lookup_mac trying given order""" + mock_alleles_to_mac.side_effect = [ + None, + "AB", + ] # First call fails, second succeeds + + result = mac_handler.lookup_mac("A*01:01/A*02:01") + assert result == "A*01:AB" + + @patch("pyard.handlers.mac_handler.db.alleles_to_mac_code") + def test_lookup_mac_sorted_order(self, mock_alleles_to_mac, mac_handler): + """Test lookup_mac trying sorted order""" + # Mock the smart_sort_comparator to return consistent sorting + mac_handler.ard.smart_sort_comparator.return_value = -1 # First arg comes first + # For different antigen groups, skip single antigen optimization + mock_alleles_to_mac.side_effect = [None, "AB"] # First fails, second succeeds + + result = mac_handler.lookup_mac("A*02:01/A*01:01") + assert result == "A*01:AB" # Uses sorted antigen groups, first one is 01 + + @patch("pyard.handlers.mac_handler.db.alleles_to_mac_code") + def test_lookup_mac_no_mac_found(self, mock_alleles_to_mac, mac_handler): + """Test lookup_mac when no MAC code is found""" + mock_alleles_to_mac.return_value = None + + with pytest.raises(InvalidMACError): + mac_handler.lookup_mac("A*01:01/A*01:02") + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_get_alleles_full_allele_expansion(self, mock_mac_to_alleles, mac_handler): + """Test get_alleles with full allele expansion format""" + mock_mac_to_alleles.return_value = ["01:01", "01:02"] + + result = mac_handler.get_alleles("AB", "A*01") + expected = ["A*01:01", "A*01:02"] + assert list(result) == expected + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_get_alleles_field_suffix_expansion(self, mock_mac_to_alleles, mac_handler): + """Test get_alleles with field suffix expansion format""" + mock_mac_to_alleles.return_value = ["01", "02"] + + result = mac_handler.get_alleles("AB", "A*01") + expected = ["A*01:01", "A*01:02"] + assert list(result) == expected + + @patch("pyard.handlers.mac_handler.db.mac_code_to_alleles") + def test_get_alleles_filtered_by_database(self, mock_mac_to_alleles, mac_handler): + """Test get_alleles filters results by database presence""" + mock_mac_to_alleles.return_value = ["01:01", "01:02"] + mac_handler.ard._is_allele_in_db.side_effect = lambda x: x == "A*01:01" + + result = mac_handler.get_alleles("AB", "A*01") + assert list(result) == ["A*01:01"] + + def test_is_mac_cache_behavior(self, mac_handler): + """Test that is_mac uses caching""" + with patch( + "pyard.handlers.mac_handler.db.mac_code_to_alleles" + ) as mock_mac_to_alleles: + mock_mac_to_alleles.return_value = ["01:01"] + + # Call twice with same input + mac_handler.is_mac("A*01:AB") + mac_handler.is_mac("A*01:AB") + + # Should only call database once due to caching + assert mock_mac_to_alleles.call_count == 1 diff --git a/tests/unit/handlers/test_serology_handler.py b/tests/unit/handlers/test_serology_handler.py new file mode 100644 index 0000000..995e217 --- /dev/null +++ b/tests/unit/handlers/test_serology_handler.py @@ -0,0 +1,132 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch + +from pyard.handlers.serology_handler import SerologyHandler +from pyard.exceptions import InvalidAlleleError + + +class TestSerologyHandler: + """Test cases for SerologyHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard.db_connection = Mock() + ard.valid_serology_set = {"A1", "A2", "B27", "DR4"} + ard.serology_mapping = Mock() + ard._is_allele_in_db.return_value = True + return ard + + @pytest.fixture + def serology_handler(self, mock_ard): + """Create SerologyHandler instance""" + return SerologyHandler(mock_ard) + + def test_init(self, mock_ard): + """Test SerologyHandler initialization""" + handler = SerologyHandler(mock_ard) + assert handler.ard == mock_ard + + def test_is_serology_valid_serology(self, serology_handler): + """Test is_serology with valid serology""" + result = serology_handler.is_serology("A1") + assert result is True + + def test_is_serology_invalid_serology(self, serology_handler): + """Test is_serology with invalid serology""" + result = serology_handler.is_serology("INVALID") + assert result is False + + def test_is_serology_with_asterisk(self, serology_handler): + """Test is_serology with molecular format (contains *)""" + result = serology_handler.is_serology("A*01:01") + assert result is False + + def test_is_serology_with_colon(self, serology_handler): + """Test is_serology with molecular format (contains :)""" + result = serology_handler.is_serology("A*01:01") + assert result is False + + @patch("pyard.handlers.serology_handler.db.serology_to_alleles") + def test_get_alleles_from_serology( + self, mock_serology_to_alleles, serology_handler + ): + """Test get_alleles_from_serology""" + mock_serology_to_alleles.return_value = ["A*01:01", "A*01:02", "A*01:03"] + + result = serology_handler.get_alleles_from_serology("A1") + expected = {"A*01:01", "A*01:02", "A*01:03"} + assert result == expected + mock_serology_to_alleles.assert_called_once_with( + serology_handler.ard.db_connection, "A1" + ) + + @patch("pyard.handlers.serology_handler.db.serology_to_alleles") + def test_get_alleles_from_serology_filtered( + self, mock_serology_to_alleles, serology_handler + ): + """Test get_alleles_from_serology with database filtering""" + mock_serology_to_alleles.return_value = ["A*01:01", "A*01:02", "A*01:03"] + serology_handler.ard._is_allele_in_db.side_effect = lambda x: x != "A*01:02" + + result = serology_handler.get_alleles_from_serology("A1") + expected = {"A*01:01", "A*01:03"} + assert result == expected + + def test_find_broad_splits(self, serology_handler): + """Test find_broad_splits delegates to serology_mapping""" + serology_handler.ard.serology_mapping.find_splits.return_value = ( + "A9", + ["A23", "A24"], + ) + + result = serology_handler.find_broad_splits("A23") + assert result == ("A9", ["A23", "A24"]) + serology_handler.ard.serology_mapping.find_splits.assert_called_once_with("A23") + + def test_find_associated_antigen(self, serology_handler): + """Test find_associated_antigen delegates to serology_mapping""" + serology_handler.ard.serology_mapping.find_associated_antigen.return_value = ( + "A1" + ) + + result = serology_handler.find_associated_antigen("A1") + assert result == "A1" + serology_handler.ard.serology_mapping.find_associated_antigen.assert_called_once_with( + "A1" + ) + + @patch("pyard.handlers.serology_handler.db.find_xx_for_serology") + def test_find_xx_from_serology_valid(self, mock_find_xx, serology_handler): + """Test find_xx_from_serology with valid serology""" + mock_find_xx.return_value = "A*01:XX" + + result = serology_handler.find_xx_from_serology("A1") + assert result == "A*01:XX" + mock_find_xx.assert_called_once_with(serology_handler.ard.db_connection, "A1") + + def test_find_xx_from_serology_invalid(self, serology_handler): + """Test find_xx_from_serology with invalid serology""" + with patch.object(serology_handler, "is_serology", return_value=False): + with pytest.raises(InvalidAlleleError): + serology_handler.find_xx_from_serology("INVALID") + + @pytest.mark.parametrize( + "serology,expected", + [ + ("A1", True), + ("A2", True), + ("B27", True), + ("DR4", True), + ("INVALID", False), + ("A*01:01", False), + ("A1:01", False), + ], + ) + def test_is_serology_various_inputs(self, serology_handler, serology, expected): + """Test is_serology with various input formats""" + result = serology_handler.is_serology(serology) + assert result == expected diff --git a/tests/unit/handlers/test_shortnull_handler.py b/tests/unit/handlers/test_shortnull_handler.py new file mode 100644 index 0000000..4acf61c --- /dev/null +++ b/tests/unit/handlers/test_shortnull_handler.py @@ -0,0 +1,124 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock + +from pyard.handlers.shortnull_handler import ShortNullHandler + + +class TestShortNullHandler: + """Test cases for ShortNullHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard._config = {"reduce_shortnull": True} + ard.shortnulls = {"A*01:01N", "B*07:02N"} + ard.is_mac.return_value = False + return ard + + @pytest.fixture + def shortnull_handler(self, mock_ard): + """Create ShortNullHandler instance""" + return ShortNullHandler(mock_ard) + + def test_init(self, mock_ard): + """Test ShortNullHandler initialization""" + handler = ShortNullHandler(mock_ard) + assert handler.ard == mock_ard + + def test_is_shortnull_valid_with_config_enabled(self, shortnull_handler): + """Test is_shortnull with valid short null and config enabled""" + result = shortnull_handler.is_shortnull("A*01:01N") + assert result is True + + def test_is_shortnull_valid_with_config_disabled(self, mock_ard): + """Test is_shortnull with valid short null but config disabled""" + mock_ard._config["reduce_shortnull"] = False + handler = ShortNullHandler(mock_ard) + + result = handler.is_shortnull("A*01:01N") + assert result is False + + def test_is_shortnull_invalid_allele(self, shortnull_handler): + """Test is_shortnull with allele not in shortnulls set""" + result = shortnull_handler.is_shortnull("A*02:01N") + assert result is False + + def test_is_shortnull_non_null_allele(self, shortnull_handler): + """Test is_shortnull with non-null allele""" + result = shortnull_handler.is_shortnull("A*01:01") + assert result is False + + def test_is_null_valid_null_allele(self, shortnull_handler): + """Test is_null with valid null allele (ends with N)""" + result = shortnull_handler.is_null("A*01:01N") + assert result is True + + def test_is_null_non_null_allele(self, shortnull_handler): + """Test is_null with non-null allele""" + result = shortnull_handler.is_null("A*01:01") + assert result is False + + def test_is_null_mac_code_ending_with_n(self, shortnull_handler): + """Test is_null with MAC code ending with N (should return False)""" + shortnull_handler.ard.is_mac.return_value = True + + result = shortnull_handler.is_null("A*01:AN") + assert result is False + + def test_is_null_allele_ending_with_n_not_mac(self, shortnull_handler): + """Test is_null with allele ending with N that is not a MAC code""" + shortnull_handler.ard.is_mac.return_value = False + + result = shortnull_handler.is_null("A*01:01N") + assert result is True + + @pytest.mark.parametrize( + "allele,in_shortnulls,config_enabled,expected", + [ + ("A*01:01N", True, True, True), + ("A*01:01N", True, False, False), + ("A*01:01N", False, True, False), + ("A*01:01N", False, False, False), + ( + "A*01:01", + True, + True, + True, + ), # If allele is in shortnulls set and config enabled, returns True + ("A*01:01", False, True, False), + ], + ) + def test_is_shortnull_combinations( + self, mock_ard, allele, in_shortnulls, config_enabled, expected + ): + """Test is_shortnull with various combinations of conditions""" + mock_ard._config["reduce_shortnull"] = config_enabled + mock_ard.shortnulls = {allele} if in_shortnulls else set() + handler = ShortNullHandler(mock_ard) + + result = handler.is_shortnull(allele) + assert result == expected + + @pytest.mark.parametrize( + "allele,ends_with_n,is_mac_code,expected", + [ + ("A*01:01N", True, False, True), + ("A*01:01N", True, True, False), + ("A*01:01", False, False, False), + ("A*01:01", False, True, False), + ("A*01:AN", True, True, False), + ("A*01:AN", True, False, True), + ], + ) + def test_is_null_combinations( + self, mock_ard, allele, ends_with_n, is_mac_code, expected + ): + """Test is_null with various combinations of conditions""" + mock_ard.is_mac.return_value = is_mac_code + handler = ShortNullHandler(mock_ard) + + result = handler.is_null(allele) + assert result == expected diff --git a/tests/unit/handlers/test_v2_handler.py b/tests/unit/handlers/test_v2_handler.py new file mode 100644 index 0000000..1c81a17 --- /dev/null +++ b/tests/unit/handlers/test_v2_handler.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock, patch + +from pyard.handlers.v2_handler import V2Handler + + +class TestV2Handler: + """Test cases for V2Handler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard._config = {"reduce_v2": True} + ard.db_connection = Mock() + ard.is_mac.return_value = False + ard._is_allele_in_db.return_value = True + return ard + + @pytest.fixture + def v2_handler(self, mock_ard): + """Create V2Handler instance""" + return V2Handler(mock_ard) + + def test_init(self, mock_ard): + """Test V2Handler initialization""" + handler = V2Handler(mock_ard) + assert handler.ard == mock_ard + + @patch("pyard.handlers.v2_handler.db.v2_to_v3_allele") + def test_is_v2_valid_v2_format(self, mock_v2_to_v3, v2_handler): + """Test is_v2 with valid V2 format allele""" + mock_v2_to_v3.return_value = "A*01:01" + + result = v2_handler.is_v2("A*0101") + assert result is True + + def test_is_v2_config_disabled(self, mock_ard): + """Test is_v2 with V2 reduction disabled""" + mock_ard._config["reduce_v2"] = False + handler = V2Handler(mock_ard) + + result = handler.is_v2("A*0101") + assert result is False + + def test_is_v2_no_asterisk(self, v2_handler): + """Test is_v2 with allele without asterisk""" + result = v2_handler.is_v2("A0101") + assert result is False + + def test_is_v2_has_colon(self, v2_handler): + """Test is_v2 with allele containing colon (V3 format)""" + result = v2_handler.is_v2("A*01:01") + assert result is False + + @pytest.mark.parametrize("locus", ["MICA", "MICB", "HFE"]) + def test_is_v2_excluded_loci(self, v2_handler, locus): + """Test is_v2 with excluded loci""" + result = v2_handler.is_v2(f"{locus}*001") + assert result is False + + @patch("pyard.handlers.v2_handler.db.v2_to_v3_allele") + def test_is_v2_no_conversion_available(self, mock_v2_to_v3, v2_handler): + """Test is_v2 when no V3 conversion is available""" + mock_v2_to_v3.return_value = "A*0101" # Same as input, no conversion + + result = v2_handler.is_v2("A*0101") + assert result is False + + @patch("pyard.handlers.v2_handler.db.v2_to_v3_allele") + def test_is_v2_mac_code_result(self, mock_v2_to_v3, v2_handler): + """Test is_v2 when conversion results in MAC code""" + mock_v2_to_v3.return_value = "A*01:AB" + v2_handler.ard.is_mac.return_value = True + + result = v2_handler.is_v2("A*0101") + assert result is True + + @patch("pyard.handlers.v2_handler.db.v2_to_v3_allele") + def test_map_v2_to_v3_database_mapping(self, mock_v2_to_v3, v2_handler): + """Test map_v2_to_v3 with database mapping available""" + mock_v2_to_v3.return_value = "A*01:01" + + result = v2_handler.map_v2_to_v3("A*0101") + assert result == "A*01:01" + mock_v2_to_v3.assert_called_once_with(v2_handler.ard.db_connection, "A*0101") + + @patch("pyard.handlers.v2_handler.db.v2_to_v3_allele") + def test_map_v2_to_v3_heuristic_fallback(self, mock_v2_to_v3, v2_handler): + """Test map_v2_to_v3 falls back to heuristic when no database mapping""" + mock_v2_to_v3.return_value = None + + with patch.object( + v2_handler, "_predict_v3", return_value="A*01:01" + ) as mock_predict: + result = v2_handler.map_v2_to_v3("A*0101") + assert result == "A*01:01" + mock_predict.assert_called_once_with("A*0101") + + def test_predict_v3_single_digit(self, v2_handler): + """Test _predict_v3 with single digit allele (should return unchanged)""" + result = v2_handler._predict_v3("A*1") + assert result == "A*1" + + def test_predict_v3_two_digits(self, v2_handler): + """Test _predict_v3 with two digits""" + result = v2_handler._predict_v3("A*01") + assert result == "A*01" + + def test_predict_v3_two_digits_with_suffix(self, v2_handler): + """Test _predict_v3 with two digits and non-digit suffix""" + result = v2_handler._predict_v3("A*01N") + assert result == "A*01:N" + + def test_predict_v3_four_digits_even(self, v2_handler): + """Test _predict_v3 with four digits (even number)""" + result = v2_handler._predict_v3("A*0101") + assert result == "A*01:01" + + def test_predict_v3_five_digits_odd(self, v2_handler): + """Test _predict_v3 with five digits (odd number)""" + result = v2_handler._predict_v3("A*01011") + assert result == "A*01:011" + + def test_predict_v3_dp_locus_five_digits(self, v2_handler): + """Test _predict_v3 with DP locus and five digits (special case)""" + result = v2_handler._predict_v3("DPA1*01011") + assert result == "DPA1*010:11" + + def test_predict_v3_six_digits_even(self, v2_handler): + """Test _predict_v3 with six digits (even number)""" + result = v2_handler._predict_v3("A*010101") + assert result == "A*01:01:01" + + def test_predict_v3_with_suffix(self, v2_handler): + """Test _predict_v3 with digits and suffix""" + result = v2_handler._predict_v3("A*0101N") + assert result == "A*01:01N" + + def test_predict_v3_no_digits(self, v2_handler): + """Test _predict_v3 with no digit pattern (should return unchanged)""" + result = v2_handler._predict_v3("A*ABC") + assert result == "A*ABC" + + def test_combine_with_colon_four_digits(self, v2_handler): + """Test _combine_with_colon with four digits""" + result = v2_handler._combine_with_colon("0101") + assert result == "01:01" + + def test_combine_with_colon_six_digits(self, v2_handler): + """Test _combine_with_colon with six digits""" + result = v2_handler._combine_with_colon("010101") + assert result == "01:01:01" + + def test_combine_with_colon_eight_digits(self, v2_handler): + """Test _combine_with_colon with eight digits""" + result = v2_handler._combine_with_colon("01010101") + assert result == "01:01:01:01" + + @pytest.mark.parametrize( + "v2_allele,expected_v3", + [ + ("A*01", "A*01"), + ("A*0101", "A*01:01"), + ("A*010101", "A*01:01:01"), + ("A*01010101", "A*01:01:01:01"), + ("A*0101N", "A*01:01N"), + ("A*01011", "A*01:011"), + ("DPA1*01011", "DPA1*010:11"), + ("A*1", "A*1"), + ], + ) + def test_predict_v3_various_patterns(self, v2_handler, v2_allele, expected_v3): + """Test _predict_v3 with various V2 patterns""" + result = v2_handler._predict_v3(v2_allele) + assert result == expected_v3 diff --git a/tests/unit/handlers/test_xx_handler.py b/tests/unit/handlers/test_xx_handler.py new file mode 100644 index 0000000..3d8c4bf --- /dev/null +++ b/tests/unit/handlers/test_xx_handler.py @@ -0,0 +1,111 @@ +# -*- coding: utf-8 -*- + +import pytest +from unittest.mock import Mock + +from pyard.handlers.xx_handler import XXHandler + + +class TestXXHandler: + """Test cases for XXHandler class""" + + @pytest.fixture + def mock_ard(self): + """Create mock ARD instance""" + ard = Mock() + ard.code_mappings = Mock() + ard.code_mappings.xx_codes = {"A*01", "A*02", "B*27", "B*15"} + return ard + + @pytest.fixture + def xx_handler(self, mock_ard): + """Create XXHandler instance""" + return XXHandler(mock_ard) + + def test_init(self, mock_ard): + """Test XXHandler initialization""" + handler = XXHandler(mock_ard) + assert handler.ard == mock_ard + + def test_is_xx_valid_xx_code(self, xx_handler): + """Test is_xx with valid XX code""" + result = xx_handler.is_xx("A*01:XX") + assert result is True + + def test_is_xx_invalid_code_suffix(self, xx_handler): + """Test is_xx with invalid code suffix (not XX)""" + result = xx_handler.is_xx("A*01:AB") + assert result is False + + def test_is_xx_invalid_locus_antigen(self, xx_handler): + """Test is_xx with locus*antigen not in mappings""" + result = xx_handler.is_xx("C*03:XX") + assert result is False + + def test_is_xx_no_colon(self, xx_handler): + """Test is_xx with string without colon""" + result = xx_handler.is_xx("A01XX") + assert result is False + + def test_is_xx_with_provided_components(self, xx_handler): + """Test is_xx with pre-parsed components""" + result = xx_handler.is_xx("", loc_antigen="A*01", code="XX") + assert result is True + + def test_is_xx_with_provided_components_invalid_code(self, xx_handler): + """Test is_xx with pre-parsed components but invalid code""" + result = xx_handler.is_xx("", loc_antigen="A*01", code="AB") + assert result is False + + def test_is_xx_with_provided_components_invalid_locus(self, xx_handler): + """Test is_xx with pre-parsed components but invalid locus*antigen""" + result = xx_handler.is_xx("", loc_antigen="C*03", code="XX") + assert result is False + + def test_is_xx_malformed_string_too_many_colons(self, xx_handler): + """Test is_xx with malformed string (too many colons)""" + result = xx_handler.is_xx("A*01:02:XX") + assert result is False + + def test_is_xx_empty_string(self, xx_handler): + """Test is_xx with empty string""" + result = xx_handler.is_xx("") + assert result is False + + @pytest.mark.parametrize( + "glstring,expected", + [ + ("A*01:XX", True), + ("A*02:XX", True), + ("B*27:XX", True), + ("B*15:XX", True), + ("A*01:AB", False), + ("A*01:01", False), + ("C*03:XX", False), + ("A01XX", False), + ("A*01XX", False), + ("", False), + ], + ) + def test_is_xx_various_inputs(self, xx_handler, glstring, expected): + """Test is_xx with various input formats""" + result = xx_handler.is_xx(glstring) + assert result == expected + + def test_is_xx_case_sensitivity(self, xx_handler): + """Test is_xx is case sensitive for XX code""" + result_upper = xx_handler.is_xx("A*01:XX") + result_lower = xx_handler.is_xx("A*01:xx") + + assert result_upper is True + assert result_lower is False + + def test_is_xx_with_partial_components_none_loc_antigen(self, xx_handler): + """Test is_xx with None loc_antigen but valid code""" + result = xx_handler.is_xx("A*01:XX", loc_antigen=None, code="XX") + assert result is True + + def test_is_xx_with_partial_components_none_code(self, xx_handler): + """Test is_xx with valid loc_antigen but None code""" + result = xx_handler.is_xx("A*01:XX", loc_antigen="A*01", code=None) + assert result is True From 7515e69ea2f8bff7908b722432397bbc03363fc6 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Mon, 3 Nov 2025 16:43:29 -0600 Subject: [PATCH 22/24] Make refactored ARD the default. --- pyard/__init__.py | 2 +- pyard/ard.py | 1032 ++++++--------------------- pyard/ard_refactored.py | 463 ------------ tests/unit/simple_table/__init__.py | 0 4 files changed, 228 insertions(+), 1269 deletions(-) delete mode 100644 pyard/ard_refactored.py create mode 100644 tests/unit/simple_table/__init__.py diff --git a/pyard/__init__.py b/pyard/__init__.py index 5b80192..d32a95a 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -38,7 +38,7 @@ def init( cache_size: int = DEFAULT_CACHE_SIZE, config: dict = None, ): - from .ard_refactored import ARD + from .ard import ARD ard = ARD( imgt_version=imgt_version, diff --git a/pyard/ard.py b/pyard/ard.py index b024dd3..6f43db7 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -1,50 +1,30 @@ # -*- coding: utf-8 -*- -# -# py-ard -# Copyright (c) 2023 Be The Match operated by National Marrow Donor Program. -# All Rights Reserved. -# -# This library is free software; you can redistribute it and/or modify it -# under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation; either version 3 of the License, or (at -# your option) any later version. -# -# This library is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; with out even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -# License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with this library; if not, write to the Free Software Foundation, -# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. -# -# > http://www.fsf.org/licensing/licenses/lgpl.html -# > http://www.opensource.org/licenses/lgpl-license.php -# + import functools -import re -import sqlite3 import sys -from collections import Counter -from typing import Iterable, List, Union +from typing import Union, List from . import data_repository as dr from . import db from . import smart_sort from .constants import ( HLA_regex, - VALID_REDUCTION_TYPES, - expression_chars, DEFAULT_CACHE_SIZE, G_GROUP_LOCI, + VALID_REDUCTION_TYPE, + expression_chars, ) -from .exceptions import InvalidAlleleError, InvalidMACError, InvalidTypingError -from .misc import ( - get_n_field_allele, - get_2field_allele, - is_2_field_allele, - validate_reduction_type, +from .exceptions import InvalidMACError, InvalidTypingError +from .handlers import ( + AlleleHandler, + GLStringHandler, + MACHandler, + SerologyHandler, + V2Handler, + XXHandler, + ShortNullHandler, ) +from .misc import get_2field_allele, is_2_field_allele from .serology import SerologyMapping default_config = { @@ -63,14 +43,9 @@ } -# Typing information - - class ARD(object): """ - ARD reduction for HLA - Allows reducing alleles, allele code(MAC), Serology to - G, lg, lgx, W, exon, S and U2 levels. + ARD reduction for HLA - Refactored with specialized handlers """ def __init__( @@ -81,25 +56,33 @@ def __init__( max_cache_size: int = DEFAULT_CACHE_SIZE, config: dict = None, ): - """ - ARD will load valid alleles, xx codes and MAC mappings for the given - version of IMGT database, downloading and generating the database if - not already present. - - :param imgt_version: IMGT HLA database version - :param data_dir: directory path to store cached data - :param config: directory of configuration options - """ self._data_dir = data_dir self._config = default_config.copy() if config: self._config.update(config) - # Create a database connection for writing - self.db_connection, _ = db.create_db_connection(data_dir, imgt_version) + # Initialize specialized handlers + self._initialize_handlers() + + # Setup caching + self._setup_caching(max_cache_size) + + # Initialize database and mappings + self._initialize_database(imgt_version, load_mac) + + # Freeze reference data for Python >= 3.9 + self._freeze_reference_data() + + # Reopen connection in read-only mode + self.db_connection, _ = db.create_db_connection(data_dir, imgt_version, ro=True) + + def _initialize_database(self, imgt_version: str, load_mac: bool): + """Initialize database connection and load all mappings""" + self.db_connection, _ = db.create_db_connection(self._data_dir, imgt_version) # Load ARD mappings self.ars_mappings = dr.generate_ard_mapping(self.db_connection, imgt_version) + # Load Alleles and XX Codes ( self.code_mappings, @@ -108,12 +91,12 @@ def __init__( self.db_connection, imgt_version, self.ars_mappings ) - # Generate short nulls from WHO mapping + # Generate short nulls self.shortnulls = dr.generate_short_nulls( self.db_connection, self.code_mappings.who_group ) - # Load Serology mappings (Broad/Splits, Associated, Recognized) + # Load Serology mappings broad_splits_mapping, associated_mapping = dr.generate_broad_splits_mapping( self.db_connection, imgt_version ) @@ -125,679 +108,278 @@ def __init__( ) self.valid_serology_set = SerologyMapping.get_valid_serology_names() - # Load V2 to V3 mappings + # Load other mappings dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version) - # Save IMGT database version dr.set_db_version(self.db_connection, imgt_version) - # Load MAC codes dr.generate_mac_codes(self.db_connection, refresh_mac=False, load_mac=load_mac) - # Load CIWD mapping dr.generate_cwd_mapping(self.db_connection) - # Close the current read-write db connection self.db_connection.close() - # Adjust the cache for redux + def _initialize_handlers(self): + """Initialize all specialized handlers""" + self.allele_reducer = AlleleHandler(self) + self.gl_processor = GLStringHandler(self) + self.mac_handler = MACHandler(self) + self.serology_handler = SerologyHandler(self) + self.v2_handler = V2Handler(self) + self.xx_handler = XXHandler(self) + self.shortnull_handler = ShortNullHandler(self) + + def _setup_caching(self, max_cache_size: int): + """Setup caching for performance""" if max_cache_size != DEFAULT_CACHE_SIZE: self._redux_allele = functools.lru_cache(maxsize=max_cache_size)( self._redux_allele ) self.redux = functools.lru_cache(maxsize=max_cache_size)(self.redux) - self.is_mac = functools.lru_cache(maxsize=max_cache_size)(self.is_mac) + self.is_mac = functools.lru_cache(maxsize=max_cache_size)( + self.mac_handler.is_mac + ) self.smart_sort_comparator = functools.lru_cache(maxsize=max_cache_size)( smart_sort.smart_sort_comparator ) else: self.smart_sort_comparator = smart_sort.smart_sort_comparator - # reference data is read-only and can be frozen - # Works only for Python >= 3.9 + @staticmethod + def _freeze_reference_data(): + """Freeze reference data for Python >= 3.9""" if sys.version_info.major == 3 and sys.version_info.minor >= 9: import gc gc.freeze() - # Re-open the connection in read-only mode as we're not updating it anymore - self.db_connection, _ = db.create_db_connection(data_dir, imgt_version, ro=True) - def __del__(self): - """ - Close the db connection, when ARD instance goes away - :return: - """ + """Close database connection when ARD instance is destroyed""" if hasattr(self, "db_connection") and self.db_connection: self.db_connection.close() @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) def _redux_allele( - self, allele: str, redux_type: VALID_REDUCTION_TYPES, re_ping=True + self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True ) -> str: - """ - Does ARD reduction with allele and reduction type - - :param allele: An HLA allele. - :type: str - :param redux_type: reduction type. - :type: str - :return: reduced allele - :rtype: str - """ - - # deal with leading 'HLA-' - if HLA_regex.search(allele): - hla, allele_name = allele.split("-") - redux_allele = self._redux_allele(allele_name, redux_type) - if redux_allele: - if "/" in redux_allele: - return "/".join(["HLA-" + ra for ra in redux_allele.split("/")]) - return "HLA-" + redux_allele - else: - return redux_allele - + """Core allele reduction with ping logic""" if not self._config["strict"]: allele = self._get_non_strict_allele(allele) - # g_group maps alleles to their g_group - # note: this includes mappings for shortened version of alleles - # C*12:02:02:01 => C*12:02:01G - # C*12:02:02 => C*12:02:01G - # C*12:02 => C*12:02:01G + # Handle P/G suffixes + if allele.endswith(("P", "G")) and redux_type in ["lg", "lgx", "G"]: + allele = allele[:-1] - if allele.endswith(("P", "G")): - if redux_type in ["lg", "lgx", "G"]: - allele = allele[:-1] - if self._config["ping"] and re_ping: - # ping: alleles that are in P group but not in G groups are defined - # for 2-field alleles. If not already in 2-field form, reduce it to - # 2-field version first then re-reduce it to P group. - if redux_type in ("lg", "lgx", "U2"): - if allele in self.ars_mappings.p_not_g: - not_g_allele = self.ars_mappings.p_not_g[allele] - if redux_type == "lg": - return self._add_lg_suffix(not_g_allele) - return not_g_allele - else: - redux_allele = self._redux_allele(allele, redux_type, False) - if redux_allele.endswith("g"): - no_suffix_allele = redux_allele[:-1] - elif redux_allele.endswith("ARS"): - no_suffix_allele = redux_allele[:-3] - else: - no_suffix_allele = redux_allele - if ( - no_suffix_allele == allele - or "/" in no_suffix_allele - or no_suffix_allele in self.ars_mappings.p_not_g.values() - ): - return redux_allele - - twice_redux_allele = self._redux_allele( - no_suffix_allele, redux_type, False - ) - if "/" in twice_redux_allele: - return twice_redux_allele - if self.is_valid_allele(twice_redux_allele): - return twice_redux_allele - - if redux_type == "G" and allele in self.ars_mappings.g_group: - if allele in self.ars_mappings.dup_g: - return self.ars_mappings.dup_g[allele] - else: - return self.ars_mappings.g_group[allele] - elif redux_type == "P" and allele in self.ars_mappings.p_group: - return self.ars_mappings.p_group[allele] - elif redux_type in ["lgx", "lg"]: - if allele in self.ars_mappings.lgx_group: - redux_allele = self.ars_mappings.lgx_group[allele] - else: - # for 'lgx' or 'lg' mode when allele is not in G group, - # return allele with only first 2 field - redux_allele = ":".join(allele.split(":")[0:2]) - if redux_type == "lg": - return self._add_lg_suffix(redux_allele) - return redux_allele - elif redux_type == "W": - # new redux_type which is full WHO expansion - if self._is_who_allele(allele): - return allele - if allele in self.code_mappings.who_group: - return self.redux( - "/".join(self.code_mappings.who_group[allele]), redux_type - ) + # Handle ping mode + if self._config["ping"] and re_ping and redux_type in ("lg", "lgx", "U2"): + if allele in self.ars_mappings.p_not_g: + not_g_allele = self.ars_mappings.p_not_g[allele] + if redux_type == "lg": + return self.allele_reducer.add_lg_suffix(not_g_allele) + return not_g_allele else: - return allele - elif redux_type == "exon": - if allele in self.ars_mappings.exon_group: - exon_group_allele = self.ars_mappings.exon_group[allele] - # Check if the 3 field exon allele has a 4 field alleles - # that all have the same expression characters - last_char = allele[-1] - if last_char in expression_chars: - exon_short_null_allele = exon_group_allele + last_char - if self.is_shortnull(exon_short_null_allele): - return exon_short_null_allele - - return exon_group_allele - else: - # Expand to W level and then reduce to exon - w_redux = self.redux(allele, "W") - # If the W redux produces 2 field allele or the same allele, - # don't recurse - if w_redux == allele or len(w_redux.split(":")) == 2: - return allele + redux_allele = self._redux_allele(allele, redux_type, False) + if redux_allele.endswith("g"): + no_suffix_allele = redux_allele[:-1] + elif redux_allele.endswith("ARS"): + no_suffix_allele = redux_allele[:-3] else: - # recurse with the W fields - return self.redux(w_redux, "exon") - elif redux_type == "U2": - allele_fields = allele.split(":") - # If resolved out to second field leave alone - if len(allele_fields) == 2: - return allele - # If the 2 field reduction is unambiguous, reduce to 2 field level - allele_2_fields = get_n_field_allele(allele, 2, preserve_expression=True) - if self._is_allele_in_db(allele_2_fields): - return allele_2_fields - else: - # If ambiguous, reduce to G group level - return self._redux_allele(allele, "lgx") - elif redux_type == "S": - # find serology equivalent in serology_mapping - if is_2_field_allele(allele): - allele = self._redux_allele(allele, "lgx") - serology_mapping = db.find_serology_for_allele( - self.db_connection, allele, "lgx_allele_list" - ) - else: - serology_mapping = db.find_serology_for_allele( - self.db_connection, allele - ) - serology_set = set() - for serology, allele_list in serology_mapping.items(): - if allele in allele_list.split("/"): - serology_set.add(serology) - if not serology_set and is_2_field_allele(allele): - for serology, allele_list in serology_mapping.items(): - allele_list_lgx = self.redux(allele_list, "lgx") - if allele in allele_list_lgx.split("/"): - serology_set.add(serology) - return "/".join( - sorted( - serology_set, key=functools.cmp_to_key(self.smart_sort_comparator) - ) - ) - else: - # Make this an explicit lookup to the g_group or p_group table - # for stringent validation - if allele.endswith("P"): - if allele in self.ars_mappings.p_group.values(): - return allele - elif allele.endswith("G"): - if allele in self.ars_mappings.g_group.values(): - return allele + no_suffix_allele = redux_allele - if self._is_allele_in_db(allele): - return allele - else: - raise InvalidAlleleError(f"{allele} is an invalid allele.") + if ( + no_suffix_allele == allele + or "/" in no_suffix_allele + or no_suffix_allele in self.ars_mappings.p_not_g.values() + ): + return redux_allele - def _add_lg_suffix(self, redux_allele): - if "/" in redux_allele: - return "/".join( - [self._add_lg_suffix(allele) for allele in redux_allele.split("/")] - ) - # ARS suffix maybe used instead of g - if self._config["ARS_as_lg"]: - return redux_allele + "ARS" - # lg mode has g appended with lgx reduction - return redux_allele + "g" - - def _get_non_strict_allele(self, allele): - """ - In non-strict mode, if the allele is not valid, - try it with expression characters suffixed - - @param allele: allele that might have non-strict version - @return: non-strict version of the allele if it exists - """ - if not self._is_allele_in_db(allele): - for expr_char in expression_chars: - if self._is_allele_in_db(allele + expr_char): - if self._config["verbose_log"]: - print(f"{allele} is not valid. Using {allele}{expr_char}") - allele = allele + expr_char - break - return allele - - def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: - """ - Make a list of sorted unique GL Strings separated by delim. - As the list may itself contains elements that are separated by the - delimiter, split the elements first and then make them unique. - - :param gls: List of gl strings that need to be joined by delim - :param delim: Delimiter of concern - :return: a GL string sorted and made of unique GL - """ - if delim == "~": - # No need to sort - return delim.join(gls) - - if delim == "+": - # No need to make unique. eg. homozygous cases are valid for SLUGs - non_empty_gls = filter(lambda s: s != "", gls) - return delim.join( - sorted( - non_empty_gls, - key=functools.cmp_to_key( - lambda a, b: self.smart_sort_comparator( - a, b, self._config["ignore_allele_with_suffixes"] - ) - ), + twice_redux_allele = self._redux_allele( + no_suffix_allele, redux_type, False ) - ) + if "/" in twice_redux_allele: + return twice_redux_allele + if self.is_valid_allele(twice_redux_allele): + return twice_redux_allele - # generate a unique list over a delimiter - # e.g. [A, A/B] => [ A, B ] for / delimiter - all_gls = [] - for gl in gls: - all_gls += gl.split(delim) - unique_gls = filter(lambda s: s != "", set(all_gls)) - return delim.join( - sorted( - unique_gls, - key=functools.cmp_to_key( - lambda a, b: self.smart_sort_comparator( - a, b, self._config["ignore_allele_with_suffixes"] - ) - ), - ) - ) - - @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPES = "lgx") -> str: - """ - Does ARD reduction with gl string and reduction type - - :param glstring: A GL String - :type: str - :param redux_type: The reduction_type. - :type: str - :return: reduced allele - :rtype: str - """ - - validate_reduction_type(redux_type) - - if self._config["strict"]: - self.validate(glstring) - - if "^" in glstring: - return self._sorted_unique_gl( - [self.redux(a, redux_type) for a in glstring.split("^")], "^" - ) + return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) - if "|" in glstring: - return self._sorted_unique_gl( - [self.redux(a, redux_type) for a in glstring.split("|")], "|" - ) - - if "+" in glstring: - return self._sorted_unique_gl( - [self.redux(a, redux_type) for a in glstring.split("+")], "+" - ) - - if "~" in glstring: - return self._sorted_unique_gl( - [self.redux(a, redux_type) for a in glstring.split("~")], "~" - ) - - if "/" in glstring: - return self._sorted_unique_gl( - [self.redux(a, redux_type) for a in glstring.split("/")], "/" - ) - - if self._config["ignore_allele_with_suffixes"]: - _, fields = glstring.split("*") - if fields in self._config["ignore_allele_with_suffixes"]: - return glstring + def _redux_non_glstring( + self, allele: str, glstring: str, redux_type: VALID_REDUCTION_TYPE + ): + if "*" in allele: + locus, fields = allele.split("*") + # Handle ignored allele suffixes + if self._config["ignore_allele_with_suffixes"]: + if fields in self._config["ignore_allele_with_suffixes"]: + return allele + if locus not in G_GROUP_LOCI: + return allele # Handle V2 to V3 mapping - if self.is_v2(glstring): - glstring = self._map_v2_to_v3(glstring) - return self.redux(glstring, redux_type) + if self.v2_handler.is_v2(allele): + allele = self.v2_handler.map_v2_to_v3(allele) + return self.redux(allele, redux_type) # Handle Serology - if self._config["reduce_serology"] and self.is_serology(glstring): - alleles = self._get_alleles_from_serology(glstring) - # If there's corresponding alleles, return / delimited alleles + if self._config["reduce_serology"] and self.serology_handler.is_serology( + allele + ): + alleles = self.serology_handler.get_alleles_from_serology(allele) if alleles: return self.redux("/".join(alleles), redux_type) - # If there's no DNA Mapping for a serology, e.g. DPw6, return empty return "" - if ":" in glstring: - loc_allele = glstring.split(":") + # Validate allele format is correct + if ":" in allele: + loc_allele = allele.split(":") + if len(loc_allele) < 2: + raise InvalidTypingError( + f"{glstring} is not a valid V2 or Serology typing." + ) loc_antigen, code = loc_allele[0], loc_allele[1] + # Check for empty fields (like DQA1*01:01:01:G where G is after empty field) + if any(field == "" for field in loc_allele[1:]): + raise InvalidTypingError( + f"{glstring} is not a valid V2 or Serology typing." + ) else: - if "*" in glstring: - locus, _ = glstring.split("*") - if locus not in G_GROUP_LOCI: - return glstring raise InvalidTypingError( f"{glstring} is not a valid V2 or Serology typing." ) # Handle XX codes - if self._config["reduce_XX"]: - is_hla_prefix = HLA_regex.search(loc_antigen) - if is_hla_prefix: - loc_antigen = loc_antigen.split("-")[1] - if code == "XX": - if self.is_XX(glstring, loc_antigen, code): - if is_hla_prefix: - reduced_alleles = self.redux( - "/".join(self.code_mappings.xx_codes[loc_antigen]), - redux_type, - ) - return "/".join( - ["HLA-" + a for a in reduced_alleles.split("/")] - ) - else: - return self.redux( - "/".join(self.code_mappings.xx_codes[loc_antigen]), - redux_type, - ) - else: - raise InvalidTypingError(f"{glstring} is not valid XX code") + if ( + self._config["reduce_XX"] + and code == "XX" + and self.xx_handler.is_xx(allele, loc_antigen, code) + ): + reduced_alleles = self.redux( + "/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type + ) + return reduced_alleles # Handle MAC if self._config["reduce_MAC"] and code.isalpha(): - if self.is_mac(glstring): # Make sure it's a valid MAC - if HLA_regex.search(glstring): - # Remove HLA- prefix - allele_name = glstring.split("-")[1] - loc_antigen, code = allele_name.split(":") - alleles = self._get_alleles(code, loc_antigen) - alleles = ["HLA-" + a for a in alleles] - else: - alleles = self._get_alleles(code, loc_antigen) + if self.mac_handler.is_mac(allele): + alleles = self.mac_handler.get_alleles(code, loc_antigen) return self.redux("/".join(alleles), redux_type) else: raise InvalidMACError(f"{glstring} is an invalid MAC.") # Handle short nulls - if self._config["reduce_shortnull"] and self.is_shortnull(glstring): - return self.redux("/".join(self.shortnulls[glstring]), redux_type) + if self._config["reduce_shortnull"] and self.shortnull_handler.is_shortnull( + allele + ): + return self.redux("/".join(self.shortnulls[allele]), redux_type) - return self._redux_allele(glstring, redux_type) + redux_allele = self._redux_allele(allele, redux_type) + return redux_allele - def validate(self, glstring): - """ - Validates GL String - Raise an exception if not valid. + @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) + def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: + """Main redux method using specialized handlers""" + + # Handle GL string delimiters first + processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) + if processed_gl != glstring or self.is_glstring(processed_gl): + return processed_gl + + # Remove HLA- prefix for processing the allele + is_hla_prefix = HLA_regex.search(glstring) + if is_hla_prefix: + allele = glstring.split("-")[1] + else: + allele = glstring - :param glstring: GL String to validate - :return: boolean indicating success - """ - return self._is_valid_gl(glstring) + # Handle non GL string + redux_allele = self._redux_non_glstring(allele, glstring, redux_type) + # Add back 'HLA-' prefix when redux is done if needed + if is_hla_prefix: + if "/" in redux_allele: + return "/".join([f"HLA-{ra}" for ra in redux_allele.split("/")]) + redux_allele = f"HLA-{redux_allele}" + return redux_allele - def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: - if loc_antigen is None or code is None: - if ":" in glstring: - loc_allele = glstring.split(":") - loc_antigen, code = loc_allele[0], loc_allele[1] - else: - return False - return code == "XX" and loc_antigen in self.code_mappings.xx_codes + @staticmethod + def is_glstring(gl_string: str) -> bool: + return ( + "/" in gl_string or "+" in gl_string or "^" in gl_string or "~" in gl_string + ) + + # Delegate methods to handlers + def is_mac(self, allele: str) -> bool: + return self.mac_handler.is_mac(allele) def is_serology(self, allele: str) -> bool: - """ + return self.serology_handler.is_serology(allele) - Strict validation of serology: - Does not have * or : in serology. - If it exists in the database, it's serology otherwise it's not serology. + def is_v2(self, allele: str) -> bool: + return self.v2_handler.is_v2(allele) - A serology has the locus name (first 2 letters for DRB1, DQB1) - of the allele followed by numerical antigen. - Cw is the serological designation for HLA-C + def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: + return self.xx_handler.is_xx(glstring, loc_antigen, code) - :param allele: The allele to test for serology - :return: True if serology - """ - if "*" in allele or ":" in allele: - return False + def is_shortnull(self, allele: str) -> bool: + return self.shortnull_handler.is_shortnull(allele) - return allele in self.valid_serology_set + def is_null(self, allele: str) -> bool: + return self.shortnull_handler.is_null(allele) - @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def is_mac(self, allele: str) -> bool: - """ - MAC has non-digit characters after the : character. + def expand_mac(self, mac_code: str) -> str: + return self.mac_handler.expand_mac(mac_code) - Strict validation of MAC. - The allele is a MAC code if it exists in the database. - Not all strings are MACs e.g. ":THISISNOTAMAC" + def lookup_mac(self, allelelist_gl: str) -> str: + return self.mac_handler.lookup_mac(allelelist_gl) - :param allele: test if it is a MAC code - :return: True if MAC - """ - if ":" in allele: - allele_split = allele.split(":") - if len(allele_split) == 2: # MACs have only single : - locus_antigen, code = allele_split - if code.isalpha(): - try: - alleles = db.mac_code_to_alleles(self.db_connection, code) - if alleles: - if any(map(lambda a: ":" in a, alleles)): - # allele specific antigen codes have ':' in the MAC mapping - # e.g. CFWRN -> 15:01/15:98/15:157/15:202/ - # 15:239/15:280/15:340/35:43/35:67/35:79/35:102/35:118/35:185/51:220 - # Extract the antigens from the mapped alleles - antigen_groups = map(lambda a: a.split(":")[0], alleles) - # Rule 1: The 1st field with the most allele designations in the request is - # the 1st field of the allele code designation - # Rule 2: If there is a tie in the number of alleles designations sharing the 1st field, - # the 1st field with the lowest numeric value is selected. - antigen_counts = Counter(antigen_groups) - # Create a table of antigen to it's counts - # '15': 7 - # '35': 6 - # '51': 1 - # Valid antigen is the first most common one. - # As it's presorted in db, also satisfies Rule 2. - valid_antigen = antigen_counts.most_common(1).pop()[0] - # Get antigen value 15 from 'DRB1*15' - provided_antigen = locus_antigen.split("*").pop() - # The MAC is only valid if the given antigen satisfies the antigen matching Rule 1 and 2 - return provided_antigen == valid_antigen - # Valid when antigen group codes - return True - except sqlite3.OperationalError as e: - print("Error: ", e) - return False + def find_broad_splits(self, allele: str) -> tuple: + return self.serology_handler.find_broad_splits(allele) - def is_v2(self, allele: str) -> bool: - """ - Version 2 of the nomenclature is a single field. - It does not have any ':' field separator. - Eg: A*0104 - Exceptions: - Not all strings with "*" but not ":" are v2 nomenclature - DRB3*NNNN is not v2 allele - Stricter Check: - if the conversion of v2 to v3 is the same, then - it's not a V2 typing - Set 'reduce_v2' option to False to skip the check for V2. - - :param allele: Possible allele - :return: Is the allele in V2 nomenclature - """ - matches_v2_format = ( - self._config["reduce_v2"] - and "*" in allele - and ":" not in allele - and allele.split("*")[0] not in ["MICA", "MICB", "HFE"] - ) + def find_associated_antigen(self, serology: str) -> str: + return self.serology_handler.find_associated_antigen(serology) - if matches_v2_format: - v3_format_allele = self._map_v2_to_v3(allele) - if v3_format_allele != allele: - # If the last field of the allele is alpha, check if it's a MAC - if v3_format_allele.split(":").pop().isalpha(): - return self.is_mac(v3_format_allele) - return self._is_allele_in_db(v3_format_allele) - - return False - - def _is_who_allele(self, allele): - """ - Test if allele is a WHO allele in the current imgt database - :param allele: Allele to test - :return: bool to indicate if allele is valid - """ - return allele in self.allele_group.who_alleles + def find_xx_from_serology(self, serology: str) -> str: + return self.serology_handler.find_xx_from_serology(serology) - def _is_allele_in_db(self, allele): - """ - Test if allele is valid in the current imgt database - :param allele: Allele to test - :return: bool to indicate if allele is valid - """ - return allele in self.allele_group.alleles + def v2_to_v3(self, v2_allele: str) -> str: + return self.v2_handler.map_v2_to_v3(v2_allele) - def is_shortnull(self, allele): - """ - Test if allele is valid in list of shortnull alleles and - the reduce_shortnull is configured to True (WMDA rules) - :param allele: Allele to test - :return: bool to indicate if allele is valid - """ - return allele in self.shortnulls and self._config["reduce_shortnull"] - - def is_null(self, allele): - """ - Check if allele is a null allele. - - @param allele: Allele to check for null - @return: boolean indicating whether allele is null or not - """ - return allele.endswith("N") and not self.is_mac(allele) - - def is_exp_allele(self, allele): - """ - Test if allele is valid as a shortening (WHO rules) - :param allele: Allele to test - :return: bool to indicate if allele is valid - """ - return allele in self.allele_group.exp_alleles + # Keep existing methods that don't fit into handlers + def validate(self, glstring: str) -> bool: + return self.gl_processor.validate_gl_string(glstring) - def find_broad_splits(self, allele) -> tuple: - return self.serology_mapping.find_splits(allele) - - def find_associated_antigen(self, serology) -> str: - return self.serology_mapping.find_associated_antigen(serology) - - @functools.lru_cache() - def find_xx_from_serology(self, serology): - if self.is_serology(serology): - return db.find_xx_for_serology(self.db_connection, serology) - raise InvalidAlleleError(f"{serology} is not a valid serology") - - def _get_alleles(self, code, locus_antigen) -> Iterable[str]: - """ - Look up allele code in database and generate alleles - :param code: allele code to look up - :param locus_antigen: locus name for alleles - :return: valid alleles corresponding to allele code - """ - alleles = db.mac_code_to_alleles(self.db_connection, code) - - # It's an allelic expansion if any of the alleles have a `:` - # else it's a group expansion - is_allelic_expansion = any([":" in allele for allele in alleles]) - if is_allelic_expansion: - locus = locus_antigen.split("*")[0] # Just keep the locus name - alleles = [f"{locus}*{a}" for a in alleles] - else: - alleles = [f"{locus_antigen}:{a}" for a in alleles] + def _get_non_strict_allele(self, allele: str) -> str: + """Handle non-strict allele validation""" + if not self._is_allele_in_db(allele): + for expr_char in expression_chars: + if self._is_allele_in_db(allele + expr_char): + if self._config["verbose_log"]: + print(f"{allele} is not valid. Using {allele}{expr_char}") + allele = allele + expr_char + break + return allele - return list(filter(self._is_allele_in_db, alleles)) + def _is_who_allele(self, allele: str) -> bool: + return allele in self.allele_group.who_alleles - def _get_alleles_from_serology(self, serology) -> Iterable[str]: - alleles = db.serology_to_alleles(self.db_connection, serology) - return set(filter(self._is_allele_in_db, alleles)) + def _is_allele_in_db(self, allele: str) -> bool: + return allele in self.allele_group.alleles - @staticmethod - def _combine_with_colon(digits_field): - num_of_digits = len(digits_field) - return ":".join(digits_field[i : i + 2] for i in range(0, num_of_digits, 2)) - - def _predict_v3(self, v2_allele: str) -> str: - """ - Use heuristic to predict V3 from V2 - - :param v2_allele: Allele in V2 format - :return: V3 format of V2 allele - """ - # Separate out the locus and the allele name part - locus, allele_name = v2_allele.split("*") - # Separate out the numeric and non-numeric components - components = re.findall(r"^(\d+)(.*)", allele_name) - if not components: - return v2_allele - digits_field, non_digits_field = components.pop() - # final_allele is the result of the transformation - final_allele = digits_field - num_of_digits = len(digits_field) - if num_of_digits == 1: - return v2_allele - if num_of_digits > 2: - if ( - locus.startswith("DP") and num_of_digits == 5 - ): # covers DPs with 5 digits - final_allele = ( - digits_field[:3] + ":" + (digits_field[3:]) + non_digits_field - ) - elif num_of_digits % 2 == 0: # covers digits with 2, 4, 6, 8 - final_allele = self._combine_with_colon(digits_field) + non_digits_field - else: - final_allele = ( - digits_field[:2] + ":" + (digits_field[2:]) + non_digits_field - ) + def is_valid_allele(self, allele: str) -> bool: + if allele.endswith(("P", "G")): + allele = allele[:-1] + if "*" in allele: + _, fields = allele.split("*") + if not all(map(str.isalnum, fields.split(":"))): + return False + if self._is_allele_in_db(allele): + return True else: - if non_digits_field: - final_allele = digits_field + ":" + non_digits_field - return locus + "*" + final_allele - - def _map_v2_to_v3(self, v2_allele): - """ - Get V3 version of V2 versioned allele - :param v2_allele: V2 versioned allele - :return: V3 versioned allele - """ - # Check if it's in the exception case mapping - v3_allele = db.v2_to_v3_allele(self.db_connection, v2_allele) - if not v3_allele: - # Try and predict V3 - v3_allele = self._predict_v3(v2_allele) - return v3_allele + allele = get_2field_allele(allele) + return self._is_allele_in_db(allele) def _is_valid(self, allele: str) -> bool: - """ - Determines validity of an allele in various forms - - :param allele: An HLA allele. - :type: str - :return: allele or empty - :rtype: bool - """ + """Validate allele in various forms""" if allele == "" or allele.endswith("*"): return False - # validate allele without the 'HLA-' prefix if HLA_regex.search(allele): - # remove 'HLA-' prefix allele = allele[4:] if "*" in allele: @@ -824,129 +406,12 @@ def _is_valid(self, allele: str) -> bool: return True - def is_valid_allele(self, allele): - """ - Is the given allele valid? - - @param allele: - @return: - """ - # Alleles ending with P or G are valid_alleles - if allele.endswith(("P", "G")): - # remove the last character - allele = allele[:-1] - # validate format: there are no empty fields eg, 2 :: together - if "*" in allele: - _, fields = allele.split("*") - if not all(map(str.isalnum, fields.split(":"))): - return False - # The allele is valid as whole or as a 2 field version - if self._is_allele_in_db(allele): - return True - else: - allele = get_2field_allele(allele) - return self._is_allele_in_db(allele) - - def _is_valid_gl(self, glstring: str) -> bool: - """ - Determines validity of glstring - - :param glstring - :type: str - :return: result - :rtype: bool - """ - - if "^" in glstring: - return all(map(self._is_valid_gl, glstring.split("^"))) - if "|" in glstring: - return all(map(self._is_valid_gl, glstring.split("|"))) - if "+" in glstring: - return all(map(self._is_valid_gl, glstring.split("+"))) - if "~" in glstring: - return all(map(self._is_valid_gl, glstring.split("~"))) - if "/" in glstring: - return all(map(self._is_valid_gl, glstring.split("/"))) - - # what falls through here is an allele - is_valid_allele = self._is_valid(glstring) - if not is_valid_allele: - raise InvalidAlleleError(f"{glstring} is not a valid Allele") - return is_valid_allele - - def expand_mac(self, mac_code: str): - """ - Expands MAC code into its - - :param mac_code: A MAC code - :type: str - :return: GL String of expanded alleles - :rtype: str - """ - if self.is_mac(mac_code): # Validate MAC first - locus_antigen, code = mac_code.split(":") - if HLA_regex.search(mac_code): - locus_antigen = locus_antigen.split("-")[1] # Remove HLA- prefix - return "/".join( - ["HLA-" + a for a in self._get_alleles(code, locus_antigen)] - ) - else: - return "/".join(self._get_alleles(code, locus_antigen)) - - raise InvalidMACError(f"{mac_code} is an invalid MAC.") - - @functools.lru_cache() - def lookup_mac(self, allelelist_gl: str): - """ - Finds a MAC code corresponding to - - :param allelelist_gl: Allelelist GL String - :type: str - :return: MAC code - :rtype: str - """ - alleles = allelelist_gl.split("/") - allele_fields = [allele.split("*")[1] for allele in alleles] - antigen_groups = sorted({allele.split(":")[0] for allele in allele_fields}) - if len(antigen_groups) == 1: - mac_expansion = "/".join( - sorted({allele.split(":")[1] for allele in allele_fields}) - ) - # See if the 2nd field lists is in the database - mac_code = db.alleles_to_mac_code(self.db_connection, mac_expansion) - if mac_code: - locus = allelelist_gl.split("*")[0] - return f"{locus}*{antigen_groups[0]}:{mac_code}" - - # Try the given list order first with first_field:second_field combinations - mac_expansion = "/".join(allele_fields) - mac_code = db.alleles_to_mac_code(self.db_connection, mac_expansion) - if mac_code: - locus = allelelist_gl.split("*")[0] - return f"{locus}*{antigen_groups[0]}:{mac_code}" - - # Try the sorted list of first_field:second_field combinations - mac_expansion = "/".join( - sorted(allele_fields, key=functools.cmp_to_key(self.smart_sort_comparator)) - ) - mac_code = db.alleles_to_mac_code(self.db_connection, mac_expansion) - if mac_code: - locus = allelelist_gl.split("*")[0] - return f"{locus}*{antigen_groups[0]}:{mac_code}" - - raise InvalidMACError(f"{allelelist_gl} does not have a MAC.") - - def cwd_redux(self, allele_list_gl): - """ - Reduce alleles from allele_list_gl to a list that - consists of only ones appearing in CWD 2 - - If it's a MAC, use the expanded list to compare with CWD list - if it's an allele(may have null), use the allele. + # Keep remaining methods unchanged + def is_exp_allele(self, allele: str) -> bool: + return allele in self.allele_group.exp_alleles - @param allele_list_gl: allele, allele list or MAC - @return: CWD alleles as an allele list GL String - """ + def cwd_redux(self, allele_list_gl: str) -> str: + """CWD reduction using existing logic""" alleles = [] for allele in allele_list_gl.split("/"): if self.is_mac(allele): @@ -956,86 +421,43 @@ def cwd_redux(self, allele_list_gl): else: alleles.extend(self.redux(allele, "lgx").split("/")) - # get the CWD for the locus and find the containing CWD alleles locus = allele_list_gl.split("*")[0] if HLA_regex.search(locus): locus = locus.split("-")[1] ciwd_for_locus = db.load_cwd(self.db_connection, locus) alleles_in_ciwd = ciwd_for_locus.intersection(alleles) - sorted_alleles = sorted(alleles_in_ciwd) - # TODO: doesn't sort when compared with sorting with null - # E.g. B*15:01/B*15:01N - # sorted_alleles = sorted( - # alleles_in_ciwd, key=functools.cmp_to_key(self.smart_sort_comparator) - # ) - return "/".join(sorted_alleles) - - def v2_to_v3(self, v2_allele) -> str: - """ - Convert Version 2 Allele Name to Version 3 Allele Name - - :param v2_allele: Version 2 Allele Name - :return: Version 3 Allele Name - """ - if self.is_v2(v2_allele): - return self._map_v2_to_v3(v2_allele) - return v2_allele + return "/".join(sorted(alleles_in_ciwd)) def refresh_mac_codes(self) -> None: - """ - Refreshes MAC code for the current IMGT db version. - :return: None - """ dr.generate_mac_codes(self.db_connection, refresh_mac=True) def get_db_version(self) -> str: - """ - Get the IMGT DB Version Number - @return: - """ return dr.get_db_version(self.db_connection) def similar_alleles(self, prefix: str) -> Union[List, None]: - """ - Given a prefix, find similar alleles or MACs starting with the prefix. - The minimum prefix needs to specify is the locus with a `*`, - and a first field of the allele/MAC. - - @param prefix: The prefix for allele or MAC - @return: List of alleles/MACs that start with the prefix - """ - - if "*" not in prefix: # Only for those that have locus + """Find similar alleles using existing logic""" + if "*" not in prefix: return None locus, fields = prefix.split("*") - # if at least a field is specified after * if fields: - # Will check only for and after 2 fields if len(fields.split(":")) == 2: first_field, mac_prefix = fields.split(":") - if mac_prefix.isalpha(): # Check for MACs + if mac_prefix.isalpha(): similar_mac_names = db.similar_mac(self.db_connection, mac_prefix) if similar_mac_names: locus_prefix = f"{locus}*{first_field}" - # Build all the mac codes with the prefix mac_codes = [ f"{locus_prefix}:{code}" for code in similar_mac_names ] - # show only the valid macs - real_mac_codes = sorted( - filter(lambda mac: self.is_mac(mac), mac_codes) - ) - return real_mac_codes + return sorted(filter(lambda mac: self.is_mac(mac), mac_codes)) - # find similar alleles similar_allele_names = db.similar_alleles(self.db_connection, prefix) if similar_allele_names: - alleles = sorted( + return sorted( similar_allele_names, key=functools.cmp_to_key(smart_sort.smart_sort_comparator), ) - return alleles return None diff --git a/pyard/ard_refactored.py b/pyard/ard_refactored.py deleted file mode 100644 index 6f43db7..0000000 --- a/pyard/ard_refactored.py +++ /dev/null @@ -1,463 +0,0 @@ -# -*- coding: utf-8 -*- - -import functools -import sys -from typing import Union, List - -from . import data_repository as dr -from . import db -from . import smart_sort -from .constants import ( - HLA_regex, - DEFAULT_CACHE_SIZE, - G_GROUP_LOCI, - VALID_REDUCTION_TYPE, - expression_chars, -) -from .exceptions import InvalidMACError, InvalidTypingError -from .handlers import ( - AlleleHandler, - GLStringHandler, - MACHandler, - SerologyHandler, - V2Handler, - XXHandler, - ShortNullHandler, -) -from .misc import get_2field_allele, is_2_field_allele -from .serology import SerologyMapping - -default_config = { - "reduce_serology": True, - "reduce_v2": True, - "reduce_3field": True, - "reduce_P": True, - "reduce_XX": True, - "reduce_MAC": True, - "reduce_shortnull": True, - "ping": True, - "verbose_log": False, - "ARS_as_lg": False, - "strict": True, - "ignore_allele_with_suffixes": (), -} - - -class ARD(object): - """ - ARD reduction for HLA - Refactored with specialized handlers - """ - - def __init__( - self, - imgt_version: str = "Latest", - data_dir: str = None, - load_mac: bool = True, - max_cache_size: int = DEFAULT_CACHE_SIZE, - config: dict = None, - ): - self._data_dir = data_dir - self._config = default_config.copy() - if config: - self._config.update(config) - - # Initialize specialized handlers - self._initialize_handlers() - - # Setup caching - self._setup_caching(max_cache_size) - - # Initialize database and mappings - self._initialize_database(imgt_version, load_mac) - - # Freeze reference data for Python >= 3.9 - self._freeze_reference_data() - - # Reopen connection in read-only mode - self.db_connection, _ = db.create_db_connection(data_dir, imgt_version, ro=True) - - def _initialize_database(self, imgt_version: str, load_mac: bool): - """Initialize database connection and load all mappings""" - self.db_connection, _ = db.create_db_connection(self._data_dir, imgt_version) - - # Load ARD mappings - self.ars_mappings = dr.generate_ard_mapping(self.db_connection, imgt_version) - - # Load Alleles and XX Codes - ( - self.code_mappings, - self.allele_group, - ) = dr.generate_alleles_and_xx_codes_and_who( - self.db_connection, imgt_version, self.ars_mappings - ) - - # Generate short nulls - self.shortnulls = dr.generate_short_nulls( - self.db_connection, self.code_mappings.who_group - ) - - # Load Serology mappings - broad_splits_mapping, associated_mapping = dr.generate_broad_splits_mapping( - self.db_connection, imgt_version - ) - self.serology_mapping = SerologyMapping( - broad_splits_mapping, associated_mapping - ) - dr.generate_serology_mapping( - self.db_connection, imgt_version, self.serology_mapping, self._redux_allele - ) - self.valid_serology_set = SerologyMapping.get_valid_serology_names() - - # Load other mappings - dr.generate_v2_to_v3_mapping(self.db_connection, imgt_version) - dr.set_db_version(self.db_connection, imgt_version) - dr.generate_mac_codes(self.db_connection, refresh_mac=False, load_mac=load_mac) - dr.generate_cwd_mapping(self.db_connection) - - self.db_connection.close() - - def _initialize_handlers(self): - """Initialize all specialized handlers""" - self.allele_reducer = AlleleHandler(self) - self.gl_processor = GLStringHandler(self) - self.mac_handler = MACHandler(self) - self.serology_handler = SerologyHandler(self) - self.v2_handler = V2Handler(self) - self.xx_handler = XXHandler(self) - self.shortnull_handler = ShortNullHandler(self) - - def _setup_caching(self, max_cache_size: int): - """Setup caching for performance""" - if max_cache_size != DEFAULT_CACHE_SIZE: - self._redux_allele = functools.lru_cache(maxsize=max_cache_size)( - self._redux_allele - ) - self.redux = functools.lru_cache(maxsize=max_cache_size)(self.redux) - self.is_mac = functools.lru_cache(maxsize=max_cache_size)( - self.mac_handler.is_mac - ) - self.smart_sort_comparator = functools.lru_cache(maxsize=max_cache_size)( - smart_sort.smart_sort_comparator - ) - else: - self.smart_sort_comparator = smart_sort.smart_sort_comparator - - @staticmethod - def _freeze_reference_data(): - """Freeze reference data for Python >= 3.9""" - if sys.version_info.major == 3 and sys.version_info.minor >= 9: - import gc - - gc.freeze() - - def __del__(self): - """Close database connection when ARD instance is destroyed""" - if hasattr(self, "db_connection") and self.db_connection: - self.db_connection.close() - - @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def _redux_allele( - self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True - ) -> str: - """Core allele reduction with ping logic""" - if not self._config["strict"]: - allele = self._get_non_strict_allele(allele) - - # Handle P/G suffixes - if allele.endswith(("P", "G")) and redux_type in ["lg", "lgx", "G"]: - allele = allele[:-1] - - # Handle ping mode - if self._config["ping"] and re_ping and redux_type in ("lg", "lgx", "U2"): - if allele in self.ars_mappings.p_not_g: - not_g_allele = self.ars_mappings.p_not_g[allele] - if redux_type == "lg": - return self.allele_reducer.add_lg_suffix(not_g_allele) - return not_g_allele - else: - redux_allele = self._redux_allele(allele, redux_type, False) - if redux_allele.endswith("g"): - no_suffix_allele = redux_allele[:-1] - elif redux_allele.endswith("ARS"): - no_suffix_allele = redux_allele[:-3] - else: - no_suffix_allele = redux_allele - - if ( - no_suffix_allele == allele - or "/" in no_suffix_allele - or no_suffix_allele in self.ars_mappings.p_not_g.values() - ): - return redux_allele - - twice_redux_allele = self._redux_allele( - no_suffix_allele, redux_type, False - ) - if "/" in twice_redux_allele: - return twice_redux_allele - if self.is_valid_allele(twice_redux_allele): - return twice_redux_allele - - return self.allele_reducer.reduce_allele(allele, redux_type, re_ping) - - def _redux_non_glstring( - self, allele: str, glstring: str, redux_type: VALID_REDUCTION_TYPE - ): - if "*" in allele: - locus, fields = allele.split("*") - # Handle ignored allele suffixes - if self._config["ignore_allele_with_suffixes"]: - if fields in self._config["ignore_allele_with_suffixes"]: - return allele - if locus not in G_GROUP_LOCI: - return allele - - # Handle V2 to V3 mapping - if self.v2_handler.is_v2(allele): - allele = self.v2_handler.map_v2_to_v3(allele) - return self.redux(allele, redux_type) - - # Handle Serology - if self._config["reduce_serology"] and self.serology_handler.is_serology( - allele - ): - alleles = self.serology_handler.get_alleles_from_serology(allele) - if alleles: - return self.redux("/".join(alleles), redux_type) - return "" - - # Validate allele format is correct - if ":" in allele: - loc_allele = allele.split(":") - if len(loc_allele) < 2: - raise InvalidTypingError( - f"{glstring} is not a valid V2 or Serology typing." - ) - loc_antigen, code = loc_allele[0], loc_allele[1] - # Check for empty fields (like DQA1*01:01:01:G where G is after empty field) - if any(field == "" for field in loc_allele[1:]): - raise InvalidTypingError( - f"{glstring} is not a valid V2 or Serology typing." - ) - else: - raise InvalidTypingError( - f"{glstring} is not a valid V2 or Serology typing." - ) - - # Handle XX codes - if ( - self._config["reduce_XX"] - and code == "XX" - and self.xx_handler.is_xx(allele, loc_antigen, code) - ): - reduced_alleles = self.redux( - "/".join(self.code_mappings.xx_codes[loc_antigen]), redux_type - ) - return reduced_alleles - - # Handle MAC - if self._config["reduce_MAC"] and code.isalpha(): - if self.mac_handler.is_mac(allele): - alleles = self.mac_handler.get_alleles(code, loc_antigen) - return self.redux("/".join(alleles), redux_type) - else: - raise InvalidMACError(f"{glstring} is an invalid MAC.") - - # Handle short nulls - if self._config["reduce_shortnull"] and self.shortnull_handler.is_shortnull( - allele - ): - return self.redux("/".join(self.shortnulls[allele]), redux_type) - - redux_allele = self._redux_allele(allele, redux_type) - return redux_allele - - @functools.lru_cache(maxsize=DEFAULT_CACHE_SIZE) - def redux(self, glstring: str, redux_type: VALID_REDUCTION_TYPE = "lgx") -> str: - """Main redux method using specialized handlers""" - - # Handle GL string delimiters first - processed_gl = self.gl_processor.process_gl_string(glstring, redux_type) - if processed_gl != glstring or self.is_glstring(processed_gl): - return processed_gl - - # Remove HLA- prefix for processing the allele - is_hla_prefix = HLA_regex.search(glstring) - if is_hla_prefix: - allele = glstring.split("-")[1] - else: - allele = glstring - - # Handle non GL string - redux_allele = self._redux_non_glstring(allele, glstring, redux_type) - # Add back 'HLA-' prefix when redux is done if needed - if is_hla_prefix: - if "/" in redux_allele: - return "/".join([f"HLA-{ra}" for ra in redux_allele.split("/")]) - redux_allele = f"HLA-{redux_allele}" - return redux_allele - - @staticmethod - def is_glstring(gl_string: str) -> bool: - return ( - "/" in gl_string or "+" in gl_string or "^" in gl_string or "~" in gl_string - ) - - # Delegate methods to handlers - def is_mac(self, allele: str) -> bool: - return self.mac_handler.is_mac(allele) - - def is_serology(self, allele: str) -> bool: - return self.serology_handler.is_serology(allele) - - def is_v2(self, allele: str) -> bool: - return self.v2_handler.is_v2(allele) - - def is_XX(self, glstring: str, loc_antigen: str = None, code: str = None) -> bool: - return self.xx_handler.is_xx(glstring, loc_antigen, code) - - def is_shortnull(self, allele: str) -> bool: - return self.shortnull_handler.is_shortnull(allele) - - def is_null(self, allele: str) -> bool: - return self.shortnull_handler.is_null(allele) - - def expand_mac(self, mac_code: str) -> str: - return self.mac_handler.expand_mac(mac_code) - - def lookup_mac(self, allelelist_gl: str) -> str: - return self.mac_handler.lookup_mac(allelelist_gl) - - def find_broad_splits(self, allele: str) -> tuple: - return self.serology_handler.find_broad_splits(allele) - - def find_associated_antigen(self, serology: str) -> str: - return self.serology_handler.find_associated_antigen(serology) - - def find_xx_from_serology(self, serology: str) -> str: - return self.serology_handler.find_xx_from_serology(serology) - - def v2_to_v3(self, v2_allele: str) -> str: - return self.v2_handler.map_v2_to_v3(v2_allele) - - # Keep existing methods that don't fit into handlers - def validate(self, glstring: str) -> bool: - return self.gl_processor.validate_gl_string(glstring) - - def _get_non_strict_allele(self, allele: str) -> str: - """Handle non-strict allele validation""" - if not self._is_allele_in_db(allele): - for expr_char in expression_chars: - if self._is_allele_in_db(allele + expr_char): - if self._config["verbose_log"]: - print(f"{allele} is not valid. Using {allele}{expr_char}") - allele = allele + expr_char - break - return allele - - def _is_who_allele(self, allele: str) -> bool: - return allele in self.allele_group.who_alleles - - def _is_allele_in_db(self, allele: str) -> bool: - return allele in self.allele_group.alleles - - def is_valid_allele(self, allele: str) -> bool: - if allele.endswith(("P", "G")): - allele = allele[:-1] - if "*" in allele: - _, fields = allele.split("*") - if not all(map(str.isalnum, fields.split(":"))): - return False - if self._is_allele_in_db(allele): - return True - else: - allele = get_2field_allele(allele) - return self._is_allele_in_db(allele) - - def _is_valid(self, allele: str) -> bool: - """Validate allele in various forms""" - if allele == "" or allele.endswith("*"): - return False - - if HLA_regex.search(allele): - allele = allele[4:] - - if "*" in allele: - alphanum_allele = allele.replace("*", "").replace(":", "") - if not alphanum_allele.isalnum(): - return False - - if self._config["ignore_allele_with_suffixes"]: - locus, fields = allele.split("*") - if fields in self._config["ignore_allele_with_suffixes"]: - return True - - if not self._config["strict"]: - allele = self._get_non_strict_allele(allele) - - if ( - not self.is_mac(allele) - and not self.is_XX(allele) - and not self.is_serology(allele) - and not self.is_v2(allele) - and not self.is_shortnull(allele) - ): - return self.is_valid_allele(allele) - - return True - - # Keep remaining methods unchanged - def is_exp_allele(self, allele: str) -> bool: - return allele in self.allele_group.exp_alleles - - def cwd_redux(self, allele_list_gl: str) -> str: - """CWD reduction using existing logic""" - alleles = [] - for allele in allele_list_gl.split("/"): - if self.is_mac(allele): - alleles.extend(self.expand_mac(allele).split("/")) - elif is_2_field_allele(allele) and not self.is_XX(allele): - alleles.append(allele) - else: - alleles.extend(self.redux(allele, "lgx").split("/")) - - locus = allele_list_gl.split("*")[0] - if HLA_regex.search(locus): - locus = locus.split("-")[1] - ciwd_for_locus = db.load_cwd(self.db_connection, locus) - - alleles_in_ciwd = ciwd_for_locus.intersection(alleles) - return "/".join(sorted(alleles_in_ciwd)) - - def refresh_mac_codes(self) -> None: - dr.generate_mac_codes(self.db_connection, refresh_mac=True) - - def get_db_version(self) -> str: - return dr.get_db_version(self.db_connection) - - def similar_alleles(self, prefix: str) -> Union[List, None]: - """Find similar alleles using existing logic""" - if "*" not in prefix: - return None - - locus, fields = prefix.split("*") - if fields: - if len(fields.split(":")) == 2: - first_field, mac_prefix = fields.split(":") - if mac_prefix.isalpha(): - similar_mac_names = db.similar_mac(self.db_connection, mac_prefix) - if similar_mac_names: - locus_prefix = f"{locus}*{first_field}" - mac_codes = [ - f"{locus_prefix}:{code}" for code in similar_mac_names - ] - return sorted(filter(lambda mac: self.is_mac(mac), mac_codes)) - - similar_allele_names = db.similar_alleles(self.db_connection, prefix) - if similar_allele_names: - return sorted( - similar_allele_names, - key=functools.cmp_to_key(smart_sort.smart_sort_comparator), - ) - - return None diff --git a/tests/unit/simple_table/__init__.py b/tests/unit/simple_table/__init__.py new file mode 100644 index 0000000..e69de29 From 9c27981b30b408c51d5c22761a36498f32467a71 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 5 Nov 2025 14:42:09 -0600 Subject: [PATCH 23/24] Extract config - Instead of dictionary, create ARDConfig that encapsulates the configuration properties --- pyard/__init__.py | 4 +- pyard/ard.py | 48 +++------- pyard/config.py | 95 +++++++++++++++++++ pyard/handlers/allele_handler.py | 2 +- pyard/handlers/gl_string_processor.py | 6 +- pyard/handlers/shortnull_handler.py | 2 +- pyard/handlers/v2_handler.py | 2 +- pyard/reducers/lg_reducer.py | 2 +- tests/unit/handlers/test_allele_handler.py | 7 +- tests/unit/handlers/test_shortnull_handler.py | 7 +- tests/unit/handlers/test_v2_handler.py | 5 +- tests/unit/reducers/test_all_reducers.py | 4 +- tests/unit/reducers/test_lg_reducer.py | 6 +- 13 files changed, 136 insertions(+), 54 deletions(-) create mode 100644 pyard/config.py diff --git a/pyard/__init__.py b/pyard/__init__.py index d32a95a..ef7daea 100644 --- a/pyard/__init__.py +++ b/pyard/__init__.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - # # py-ard # Copyright (c) 2023 Be The Match operated by National Marrow Donor Program. All Rights Reserved. @@ -21,10 +20,11 @@ # > http://www.fsf.org/licensing/licenses/lgpl.html # > http://www.opensource.org/licenses/lgpl-license.php # -from .constants import DEFAULT_CACHE_SIZE # exports for `pyard` from .blender import blender as dr_blender +from .config import ARDConfig +from .constants import DEFAULT_CACHE_SIZE from .misc import get_imgt_db_versions as db_versions __author__ = """NMDP Bioinformatics""" diff --git a/pyard/ard.py b/pyard/ard.py index 6f43db7..daf3409 100644 --- a/pyard/ard.py +++ b/pyard/ard.py @@ -26,21 +26,7 @@ ) from .misc import get_2field_allele, is_2_field_allele from .serology import SerologyMapping - -default_config = { - "reduce_serology": True, - "reduce_v2": True, - "reduce_3field": True, - "reduce_P": True, - "reduce_XX": True, - "reduce_MAC": True, - "reduce_shortnull": True, - "ping": True, - "verbose_log": False, - "ARS_as_lg": False, - "strict": True, - "ignore_allele_with_suffixes": (), -} +from .config import ARDConfig class ARD(object): @@ -57,9 +43,7 @@ def __init__( config: dict = None, ): self._data_dir = data_dir - self._config = default_config.copy() - if config: - self._config.update(config) + self.config = ARDConfig.from_dict(config) # Initialize specialized handlers self._initialize_handlers() @@ -160,7 +144,7 @@ def _redux_allele( self, allele: str, redux_type: VALID_REDUCTION_TYPE, re_ping=True ) -> str: """Core allele reduction with ping logic""" - if not self._config["strict"]: + if not self.config.strict: allele = self._get_non_strict_allele(allele) # Handle P/G suffixes @@ -168,7 +152,7 @@ def _redux_allele( allele = allele[:-1] # Handle ping mode - if self._config["ping"] and re_ping and redux_type in ("lg", "lgx", "U2"): + if self.config.ping and re_ping and redux_type in ("lg", "lgx", "U2"): if allele in self.ars_mappings.p_not_g: not_g_allele = self.ars_mappings.p_not_g[allele] if redux_type == "lg": @@ -206,8 +190,8 @@ def _redux_non_glstring( if "*" in allele: locus, fields = allele.split("*") # Handle ignored allele suffixes - if self._config["ignore_allele_with_suffixes"]: - if fields in self._config["ignore_allele_with_suffixes"]: + if self.config.ignore_allele_with_suffixes: + if fields in self.config.ignore_allele_with_suffixes: return allele if locus not in G_GROUP_LOCI: return allele @@ -218,9 +202,7 @@ def _redux_non_glstring( return self.redux(allele, redux_type) # Handle Serology - if self._config["reduce_serology"] and self.serology_handler.is_serology( - allele - ): + if self.config.reduce_serology and self.serology_handler.is_serology(allele): alleles = self.serology_handler.get_alleles_from_serology(allele) if alleles: return self.redux("/".join(alleles), redux_type) @@ -246,7 +228,7 @@ def _redux_non_glstring( # Handle XX codes if ( - self._config["reduce_XX"] + self.config.reduce_XX and code == "XX" and self.xx_handler.is_xx(allele, loc_antigen, code) ): @@ -256,7 +238,7 @@ def _redux_non_glstring( return reduced_alleles # Handle MAC - if self._config["reduce_MAC"] and code.isalpha(): + if self.config.reduce_MAC and code.isalpha(): if self.mac_handler.is_mac(allele): alleles = self.mac_handler.get_alleles(code, loc_antigen) return self.redux("/".join(alleles), redux_type) @@ -264,9 +246,7 @@ def _redux_non_glstring( raise InvalidMACError(f"{glstring} is an invalid MAC.") # Handle short nulls - if self._config["reduce_shortnull"] and self.shortnull_handler.is_shortnull( - allele - ): + if self.config.reduce_shortnull and self.shortnull_handler.is_shortnull(allele): return self.redux("/".join(self.shortnulls[allele]), redux_type) redux_allele = self._redux_allele(allele, redux_type) @@ -349,7 +329,7 @@ def _get_non_strict_allele(self, allele: str) -> str: if not self._is_allele_in_db(allele): for expr_char in expression_chars: if self._is_allele_in_db(allele + expr_char): - if self._config["verbose_log"]: + if self.config.verbose_log: print(f"{allele} is not valid. Using {allele}{expr_char}") allele = allele + expr_char break @@ -387,12 +367,12 @@ def _is_valid(self, allele: str) -> bool: if not alphanum_allele.isalnum(): return False - if self._config["ignore_allele_with_suffixes"]: + if self.config.ignore_allele_with_suffixes: locus, fields = allele.split("*") - if fields in self._config["ignore_allele_with_suffixes"]: + if fields in self.config.ignore_allele_with_suffixes: return True - if not self._config["strict"]: + if not self.config.strict: allele = self._get_non_strict_allele(allele) if ( diff --git a/pyard/config.py b/pyard/config.py new file mode 100644 index 0000000..ddc1e8a --- /dev/null +++ b/pyard/config.py @@ -0,0 +1,95 @@ +# -*- coding: utf-8 -*- + +from dataclasses import dataclass +from typing import Tuple + + +@dataclass +class ARDConfig: + """Configuration class for ARD reduction settings""" + + reduce_serology: bool = True + reduce_v2: bool = True + reduce_3field: bool = True + reduce_P: bool = True + reduce_XX: bool = True + reduce_MAC: bool = True + reduce_shortnull: bool = True + ping: bool = True + verbose_log: bool = False + ARS_as_lg: bool = False + strict: bool = True + ignore_allele_with_suffixes: Tuple[str, ...] = () + + @classmethod + def from_dict(cls, config_dict: dict) -> "ARDConfig": + """Create ARDConfig from dictionary""" + if not config_dict: + return cls() + + # Filter only valid fields + valid_fields = {f.name for f in cls.__dataclass_fields__.values()} + filtered_config = {k: v for k, v in config_dict.items() if k in valid_fields} + + return cls(**filtered_config) + + def to_dict(self) -> dict: + """Convert ARDConfig to dictionary""" + return { + "reduce_serology": self.reduce_serology, + "reduce_v2": self.reduce_v2, + "reduce_3field": self.reduce_3field, + "reduce_P": self.reduce_P, + "reduce_XX": self.reduce_XX, + "reduce_MAC": self.reduce_MAC, + "reduce_shortnull": self.reduce_shortnull, + "ping": self.ping, + "verbose_log": self.verbose_log, + "ARS_as_lg": self.ARS_as_lg, + "strict": self.strict, + "ignore_allele_with_suffixes": self.ignore_allele_with_suffixes, + } + + @property + def serology_enabled(self) -> bool: + return self.reduce_serology + + @property + def v2_enabled(self) -> bool: + return self.reduce_v2 + + @property + def field3_enabled(self) -> bool: + return self.reduce_3field + + @property + def p_enabled(self) -> bool: + return self.reduce_P + + @property + def xx_enabled(self) -> bool: + return self.reduce_XX + + @property + def mac_enabled(self) -> bool: + return self.reduce_MAC + + @property + def shortnull_enabled(self) -> bool: + return self.reduce_shortnull + + @property + def ping_enabled(self) -> bool: + return self.ping + + @property + def verbose_enabled(self) -> bool: + return self.verbose_log + + @property + def ars_as_lg_enabled(self) -> bool: + return self.ARS_as_lg + + @property + def strict_enabled(self) -> bool: + return self.strict diff --git a/pyard/handlers/allele_handler.py b/pyard/handlers/allele_handler.py index 7afa0c2..b79c180 100644 --- a/pyard/handlers/allele_handler.py +++ b/pyard/handlers/allele_handler.py @@ -68,6 +68,6 @@ def add_lg_suffix(self, redux_allele): [self.add_lg_suffix(allele) for allele in redux_allele.split("/")] ) # Use 'ARS' suffix if configured, otherwise use 'g' suffix - if self.ard._config["ARS_as_lg"]: + if self.ard.config.ars_as_lg_enabled: return redux_allele + "ARS" return redux_allele + "g" diff --git a/pyard/handlers/gl_string_processor.py b/pyard/handlers/gl_string_processor.py index 17c1f22..ff493ce 100644 --- a/pyard/handlers/gl_string_processor.py +++ b/pyard/handlers/gl_string_processor.py @@ -50,7 +50,7 @@ def process_gl_string( validate_reduction_type(redux_type) # Validate GL string structure if strict mode is enabled - if self.ard._config["strict"]: + if self.ard.config.strict_enabled: self.validate_gl_string(glstring) # Handle GL string delimiters in order of precedence @@ -115,7 +115,7 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: non_empty_gls, key=functools.cmp_to_key( lambda a, b: self.ard.smart_sort_comparator( - a, b, self.ard._config["ignore_allele_with_suffixes"] + a, b, self.ard.config.ignore_allele_with_suffixes ) ), ) @@ -131,7 +131,7 @@ def _sorted_unique_gl(self, gls: List[str], delim: str) -> str: unique_gls, key=functools.cmp_to_key( lambda a, b: self.ard.smart_sort_comparator( - a, b, self.ard._config["ignore_allele_with_suffixes"] + a, b, self.ard.config.ignore_allele_with_suffixes ) ), ) diff --git a/pyard/handlers/shortnull_handler.py b/pyard/handlers/shortnull_handler.py index e32a9d0..20e074e 100644 --- a/pyard/handlers/shortnull_handler.py +++ b/pyard/handlers/shortnull_handler.py @@ -38,7 +38,7 @@ def is_shortnull(self, allele: str) -> bool: True if the allele is a valid short null and short null reduction is enabled in configuration, False otherwise """ - return allele in self.ard.shortnulls and self.ard._config["reduce_shortnull"] + return allele in self.ard.shortnulls and self.ard.config.shortnull_enabled def is_null(self, allele: str) -> bool: """Check if allele is a null allele diff --git a/pyard/handlers/v2_handler.py b/pyard/handlers/v2_handler.py index 822f38e..108871f 100644 --- a/pyard/handlers/v2_handler.py +++ b/pyard/handlers/v2_handler.py @@ -44,7 +44,7 @@ def is_v2(self, allele: str) -> bool: """ # Check basic V2 format criteria matches_v2_format = ( - self.ard._config["reduce_v2"] # V2 reduction must be enabled + self.ard.config.v2_enabled # V2 reduction must be enabled and "*" in allele # Must have locus separator and ":" not in allele # Must not have field separators (V3 feature) and allele.split("*")[0] diff --git a/pyard/reducers/lg_reducer.py b/pyard/reducers/lg_reducer.py index b1aa24f..af60cd1 100644 --- a/pyard/reducers/lg_reducer.py +++ b/pyard/reducers/lg_reducer.py @@ -129,6 +129,6 @@ def _add_lg_suffix(self, redux_allele: str) -> str: ) # Add suffix based on configuration - if self.ard._config["ARS_as_lg"]: + if self.ard.config.ars_as_lg_enabled: return redux_allele + "ARS" return redux_allele + "g" diff --git a/tests/unit/handlers/test_allele_handler.py b/tests/unit/handlers/test_allele_handler.py index 80b40c2..d106764 100644 --- a/tests/unit/handlers/test_allele_handler.py +++ b/tests/unit/handlers/test_allele_handler.py @@ -3,6 +3,7 @@ import pytest from unittest.mock import Mock, MagicMock +from pyard import ARDConfig from pyard.handlers.allele_handler import AlleleHandler @@ -13,7 +14,7 @@ class TestAlleleHandler: def mock_ard(self): """Create mock ARD instance""" ard = Mock() - ard._config = {"ARS_as_lg": False} + ard.config = ARDConfig.from_dict({"ARS_as_lg": False}) return ard @pytest.fixture @@ -55,7 +56,7 @@ def test_add_lg_suffix_single_allele_default(self, allele_handler): def test_add_lg_suffix_single_allele_ars(self, mock_ard): """Test add_lg_suffix with single allele using ARS suffix""" - mock_ard._config = {"ARS_as_lg": True} + mock_ard.config = ARDConfig.from_dict({"ARS_as_lg": True}) handler = AlleleHandler(mock_ard) result = handler.add_lg_suffix("A*01:01") assert result == "A*01:01ARS" @@ -67,7 +68,7 @@ def test_add_lg_suffix_multiple_alleles(self, allele_handler): def test_add_lg_suffix_multiple_alleles_ars(self, mock_ard): """Test add_lg_suffix with multiple alleles using ARS suffix""" - mock_ard._config = {"ARS_as_lg": True} + mock_ard.config = ARDConfig.from_dict({"ARS_as_lg": True}) handler = AlleleHandler(mock_ard) result = handler.add_lg_suffix("A*01:01/A*01:02") assert result == "A*01:01ARS/A*01:02ARS" diff --git a/tests/unit/handlers/test_shortnull_handler.py b/tests/unit/handlers/test_shortnull_handler.py index 4acf61c..357f305 100644 --- a/tests/unit/handlers/test_shortnull_handler.py +++ b/tests/unit/handlers/test_shortnull_handler.py @@ -3,6 +3,7 @@ import pytest from unittest.mock import Mock +from pyard import ARDConfig from pyard.handlers.shortnull_handler import ShortNullHandler @@ -13,7 +14,7 @@ class TestShortNullHandler: def mock_ard(self): """Create mock ARD instance""" ard = Mock() - ard._config = {"reduce_shortnull": True} + ard.config = ARDConfig.from_dict({"reduce_shortnull": True}) ard.shortnulls = {"A*01:01N", "B*07:02N"} ard.is_mac.return_value = False return ard @@ -35,7 +36,7 @@ def test_is_shortnull_valid_with_config_enabled(self, shortnull_handler): def test_is_shortnull_valid_with_config_disabled(self, mock_ard): """Test is_shortnull with valid short null but config disabled""" - mock_ard._config["reduce_shortnull"] = False + mock_ard.config = ARDConfig.from_dict({"reduce_shortnull": False}) handler = ShortNullHandler(mock_ard) result = handler.is_shortnull("A*01:01N") @@ -95,7 +96,7 @@ def test_is_shortnull_combinations( self, mock_ard, allele, in_shortnulls, config_enabled, expected ): """Test is_shortnull with various combinations of conditions""" - mock_ard._config["reduce_shortnull"] = config_enabled + mock_ard.config = ARDConfig.from_dict({"reduce_shortnull": config_enabled}) mock_ard.shortnulls = {allele} if in_shortnulls else set() handler = ShortNullHandler(mock_ard) diff --git a/tests/unit/handlers/test_v2_handler.py b/tests/unit/handlers/test_v2_handler.py index 1c81a17..63c736c 100644 --- a/tests/unit/handlers/test_v2_handler.py +++ b/tests/unit/handlers/test_v2_handler.py @@ -3,6 +3,7 @@ import pytest from unittest.mock import Mock, patch +from pyard import ARDConfig from pyard.handlers.v2_handler import V2Handler @@ -13,7 +14,7 @@ class TestV2Handler: def mock_ard(self): """Create mock ARD instance""" ard = Mock() - ard._config = {"reduce_v2": True} + ard.config = ARDConfig.from_dict({"reduce_v2": True}) ard.db_connection = Mock() ard.is_mac.return_value = False ard._is_allele_in_db.return_value = True @@ -39,7 +40,7 @@ def test_is_v2_valid_v2_format(self, mock_v2_to_v3, v2_handler): def test_is_v2_config_disabled(self, mock_ard): """Test is_v2 with V2 reduction disabled""" - mock_ard._config["reduce_v2"] = False + mock_ard.config = ARDConfig.from_dict({"reduce_v2": False}) handler = V2Handler(mock_ard) result = handler.is_v2("A*0101") diff --git a/tests/unit/reducers/test_all_reducers.py b/tests/unit/reducers/test_all_reducers.py index f4e0236..9066ac6 100644 --- a/tests/unit/reducers/test_all_reducers.py +++ b/tests/unit/reducers/test_all_reducers.py @@ -2,6 +2,8 @@ import pytest from unittest.mock import Mock + +from pyard import ARDConfig from pyard.reducers import ( Reducer, GGroupReducer, @@ -30,7 +32,7 @@ def mock_ard(): ard.code_mappings = Mock() ard.code_mappings.who_group = {} ard.db_connection = Mock() - ard._config = {"ARS_as_lg": False} + ard.config = ARDConfig.from_dict({"ARS_as_lg": False}) ard._is_allele_in_db = Mock(return_value=True) ard._is_who_allele = Mock(return_value=False) ard._redux_allele = Mock() diff --git a/tests/unit/reducers/test_lg_reducer.py b/tests/unit/reducers/test_lg_reducer.py index 802b2a6..b02f1f8 100644 --- a/tests/unit/reducers/test_lg_reducer.py +++ b/tests/unit/reducers/test_lg_reducer.py @@ -2,6 +2,8 @@ import pytest from unittest.mock import Mock + +from pyard import ARDConfig from pyard.reducers.lg_reducer import LGXReducer, LGReducer @@ -11,7 +13,7 @@ def mock_ard(): ard = Mock() ard.ars_mappings = Mock() ard.ars_mappings.lgx_group = {"A*01:01:01": "A*01:01"} - ard._config = {"ARS_as_lg": False} + ard.config = ARDConfig.from_dict({"ARS_as_lg": False}) return ard @@ -48,7 +50,7 @@ def test_reduce_single_allele_with_g_suffix(self, mock_ard): def test_reduce_single_allele_with_ars_suffix(self, mock_ard): """Test reduction adds 'ARS' suffix when configured""" - mock_ard._config = {"ARS_as_lg": True} + mock_ard.config = ARDConfig.from_dict({"ARS_as_lg": True}) reducer = LGReducer(mock_ard) result = reducer.reduce("A*01:01:01") From 7fbdf8bb3e8695278a08e147194836339766fee9 Mon Sep 17 00:00:00 2001 From: Pradeep Bashyal Date: Wed, 12 Nov 2025 10:32:45 -0600 Subject: [PATCH 24/24] Include csv files in `loader/` directory --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 530305a..ff4fa99 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,6 +5,7 @@ include HISTORY.rst include LICENSE include README.md include pyard/*.csv +include pyard/loader/*.csv include requirements.txt include requirements-tests.txt