Skip to content

Commit

Permalink
Merge c556009 into 042d1e4
Browse files Browse the repository at this point in the history
  • Loading branch information
iskandr committed Nov 3, 2020
2 parents 042d1e4 + c556009 commit f6ed8cd
Show file tree
Hide file tree
Showing 34 changed files with 1,979 additions and 1,336 deletions.
6 changes: 2 additions & 4 deletions mhcgnomes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from .class2_pair import Class2Pair
from .dataframe import dataframe_from_string_list, dataframe_from_parsed_objects
from .errors import ParseError
from .function_api import normalized_string, compact_string, parse
from .function_api import cached_parser, parse
from .gene import Gene
from .haplotype import Haplotype
from .mutation import Mutation
Expand All @@ -43,10 +43,8 @@
"ParseError",
"Serotype",
"Species",

"compact_string",
"dataframe_from_parsed_objects",
"dataframe_from_string_list",
"normalized_string",
"cached_parser",
"parse",
]
2 changes: 1 addition & 1 deletion mhcgnomes/allele.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def restrict_num_allele_fields(
if num_fields >= self.num_allele_fields:
return self
else:
self.copy(
return self.copy(
allele_fields=self.allele_fields[:num_fields],
annotations=[] if drop_annotations else self.annotations,
mutations=[] if drop_mutations else self.mutations)
Expand Down
4 changes: 4 additions & 0 deletions mhcgnomes/class2_pair.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ def get(cls, alpha, beta, raw_string=None):
return None
if beta is None:
return None
if not isinstance(alpha, ResultWithMhcClass):
return None
if not isinstance(beta, ResultWithMhcClass):
return None
if alpha.mhc_class == beta.mhc_class:
mhc_class = alpha.mhc_class
else:
Expand Down
2 changes: 1 addition & 1 deletion mhcgnomes/data/haplotypes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ H2:
- Aa*b
- Ab*b
- Eb*b
- IE*k
# Ea: null
- Qa2*b
- Qa1*a
bc:
Expand Down
136 changes: 8 additions & 128 deletions mhcgnomes/function_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def parse(
map_allele_aliases=MAP_ALLELE_ALIASES,
infer_class2_pairing=INFER_CLASS2_PAIRING,
simplify_haplotypes_if_possible=COLLAPSE_SINGLETON_HAPLOTYPES,
valid_result_types=[],
required_result_types=[],
preferred_result_types=[],
raise_on_error=True):
"""
Parse MHC alleles into a structured representation.
Expand All @@ -68,9 +69,12 @@ def parse(
If a Haplotype contains only a single allele or Class II allele pair,
then return the allele instead of a haplotype.
valid_result_types : list of class
required_result_types : list of type
Only return results of the given classes.
preferred_result_types : list of type
Return a result that's one of these classes if possible, otherwise None.
raise_on_error : bool
Raise an exception if string can't be parsed. If False, return None
instead.
Expand All @@ -84,129 +88,5 @@ def parse(
default_species=default_species,
infer_class2_pairing=infer_class2_pairing,
raise_on_error=raise_on_error,
valid_result_types=valid_result_types)

def normalized_string(
raw_string,
include_species_prefix=True,
use_old_species_prefix=False,
map_allele_aliases=True,
simplify_haplotypes_if_possible=COLLAPSE_SINGLETON_HAPLOTYPES,
infer_class2_pairing=True,
default_species=DEFAULT_SPECIES_PREFIX,
valid_result_types=[],
raise_on_error=True):
"""
Transform MHC alleles into a canonical string representation.
Examples:
A2 -> HLA-A2
A0201 -> HLA-A*02:01
H2-K-k -> H2-Kk
RT-1*9.5:f -> RT1-9.5f
DRB1_0101 -> HLA-DRB1*01:01
Parameters
----------
raw_string : str
String corresponding to allele, locus, or other MHC-related name
include_species_prefix : bool
Include species in the normalized. If False, then you would
get "A*02:01" for "A0201", instead of "HLA-A*02:01"
use_old_species_prefix : bool
For species which have a newer four-digit code and and older locus
name (such as "Ecqa" / "ELA"), use the older species prefix in the
result.
map_allele_aliases : bool
simplify_haplotypes_if_possible : bool
If a Haplotype contains only a single allele or Class II allele pair,
then return the allele instead of a haplotype.
infer_class2_pairing : bool
If given only the alpha or beta chain of a Class II allele,
try to infer the most likely pairing from population frequencies.
default_species_prefix : str
By default, parse alleles like "A*02:01" as human but it's possible
to change this to some other species.
valid_result_types : list of class
Only return results of the given classes.
raise_on_error : bool
Raise an exception if string can't be parsed. If False, return None
instead.
"""
parsed_object = parse(
raw_string,
infer_class2_pairing=infer_class2_pairing,
default_species=default_species,
map_allele_aliases=map_allele_aliases,
raise_on_error=raise_on_error,
simplify_haplotypes_if_possible=simplify_haplotypes_if_possible,
valid_result_types=valid_result_types)
if not parsed_object:
return None
return parsed_object.to_string(
include_species=include_species_prefix,
use_old_species_prefix=use_old_species_prefix)

def compact_string(
raw_string,
use_old_species_prefix=False,
map_allele_aliases=True,
simplify_haplotypes_if_possible=COLLAPSE_SINGLETON_HAPLOTYPES,
infer_class2_pairing=False,
default_species="HLA",
valid_result_types=[],
raise_on_error=True):
"""
Turn HLA-A*02:01 into A0201 or H-2-D-b into H-2Db or
HLA-DPA1*01:05-DPB1*100:01 into DPA10105-DPB110001
Parameters
----------
raw_string : str
String corresponding to allele, locus, or other MHC-related name
use_old_species_prefix : bool
For species which have a newer four-digit code and and older locus
name (such as "Ecqa" / "ELA"), use the older species prefix in the
result.
map_allele_aliases : bool
simplify_haplotypes_if_possible : bool
If a Haplotype contains only a single allele or Class II allele pair,
then return the allele instead of a haplotype.
infer_class2_pairing : bool
If given only the alpha or beta chain of a Class II allele,
try to infer the most likely pairing from population frequencies.
default_species : str
By default, parse alleles like "A*02:01" as human but it's possible
to change this to some other species.
valid_result_types : list of class
Only return results of the given classes.
raise_on_error : bool
Raise an exception if string can't be parsed. If False, return None
instead.
"""
parsed_object = parse(
raw_string,
map_allele_aliases=map_allele_aliases,
infer_class2_pairing=infer_class2_pairing,
default_species=default_species,
raise_on_error=raise_on_error,
simplify_haplotypes_if_possible=simplify_haplotypes_if_possible,
valid_result_types=valid_result_types)
if not parsed_object:
return None
return parsed_object.compact_string(use_old_species_prefix=use_old_species_prefix)
required_result_types=required_result_types,
preferred_result_types=preferred_result_types)
34 changes: 5 additions & 29 deletions mhcgnomes/haplotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
is_valid_restriction,
restrict_alleles,
)
from .result_with_species import ResultWithSpecies
from .class2_locus import Class2Locus
from .class2_pair import Class2Pair
from .result_with_multiple_alleles import ResultWithMultipleAlleles
from .species import Species

class Haplotype(ResultWithSpecies):
class Haplotype(ResultWithMultipleAlleles):
def __init__(
self,
species : Species,
Expand All @@ -32,12 +32,12 @@ def __init__(
locus_restriction : Union[Class2Locus, None] = None,
parent_haplotypes : Union[Sequence["Haplotype"], None] = None,
raw_string : Union[str, None] = None):
ResultWithSpecies.__init__(
ResultWithMultipleAlleles.__init__(
self,
species=species,
name=name,
alleles=alleles,
raw_string=raw_string)
self.name = name
self.alleles = tuple(sorted(alleles))
self.class_restriction = class_restriction
self.locus_restriction = locus_restriction
self.parent_haplotypes = parent_haplotypes
Expand Down Expand Up @@ -75,10 +75,6 @@ def eq_field_names(cls):
def haplotype_name(self):
return self.name

@property
def num_alleles(self):
return len(self.alleles)

def restrict_mhc_class(self, class_restriction, raise_on_error=True):
if class_restriction is None:
return self
Expand Down Expand Up @@ -192,23 +188,3 @@ def to_string(

def compact_string(self, include_species=False):
return self.to_string(include_species=include_species)

@property
def has_allele(self):
return len(self.alleles) > 0

@property
def is_class1(self):
return all([allele.is_class1 for allele in self.alleles])

@property
def is_class2(self):
return all([allele.is_class2 for allele in self.alleles])

@property
def is_class2_alpha(self):
return all([allele.is_class2_alpha for allele in self.alleles])

@property
def is_class2_beta(self):
return all([allele.is_class2_beta for allele in self.alleles])
11 changes: 9 additions & 2 deletions mhcgnomes/mhc_class_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,19 @@ def restrict_alleles(alleles, mhc_class):
]


def normalize_mhc_class_string(mhc_class):
def normalize_mhc_class_string(mhc_class, raise_on_error=True):
original_string = mhc_class
mhc_class = mhc_class.lower()
if mhc_class.startswith("class-"):
_, mhc_class = mhc_class.split("class-")
elif mhc_class.startswith("class "):
_, mhc_class = mhc_class.split("class ")
mhc_class = mhc_class.replace("i", "I")
mhc_class = mhc_class.replace("1", "I")
mhc_class = mhc_class.replace("2", "II")
if mhc_class not in valid_class_restrictions:
raise ParseError("Invalid MHC class: '%s'" % original_string)
if raise_on_error:
raise ParseError("Invalid MHC class: '%s'" % original_string)
else:
return None
return mhc_class

0 comments on commit f6ed8cd

Please sign in to comment.