# MPV17

Pathogenic variants in ISCA2 cause [Mitochondrial DNA depletion syndrome 6 (hepatocerebral type)](https://omim.org/entry/256810). This notebook contains information from several publications.	

Note we infer that IVS3+1 G>T in PMID:22824774 is actually NM_002437.5(MPV17):c.279+1G>T (Allele ID 679728)

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from IPython.display import display, HTML
import pyphetools
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import CohortValidator
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.65


In [2]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2024-03-06


In [3]:
df = pd.read_excel('input/MPV17_curation.xlsx')
#df.head(2)

In [4]:
df['individual_id'] = df['ID']
df.set_index('ID', inplace=True)

In [5]:
def extract_cdna(variant):
    """
    split strings like c.772G>T(p.Gly258*) on the open-parenthesis symbol and return the first part
    """
    v = variant.split("(")[0]
    v = v.replace(" ", "").replace("p.","")
    return v
    
def extract_variant_1(variants):
    """
    Split on the slash ("/") and return the first part (or entire string for homozygous)
    """
    v1 = variants.split("/")[0]
    return extract_cdna(v1)

def extract_variant_2(variants):
    """
    Split on the slash ("/") and return the second part (or entire string for homozygous)
    """
    fields = variants.split("/")
    if len(fields) == 2:
        return extract_cdna(fields[1])
    else:
        # there was only one variant
        return extract_cdna(variants)
        
df["var1"] = df['Variant annotation'].apply(lambda x: extract_variant_1(x))
df["var2"] = df['Variant annotation'].apply(lambda x: extract_variant_2(x))

In [6]:
mpv17_transcript = "NM_002437.5"
mpv17_id="HGNC:26827"
vman = VariantManager(df=df, allele_1_column_name="var1", allele_2_column_name="var2", individual_column_name="individual_id",
                     gene_symbol="MPV17", gene_id=mpv17_id, transcript=mpv17_transcript)

In [7]:
vman.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,30,"c.265A>T, c.263A>T, c.461+1G>C, c.271_273del3, c.107A>C, c.67G>C, c.62T>G, c.451dupC, c.191C>G, c.122G>A, c.186+2T>C, c.135delA, c.278A>C, c.293C>T, c.263_265del3, c.70G>T, c.279+1G>T, c.70+5G>A, c.280G>C, c.197T>A, c.22_23insC, c.408T>G, c.130C>T, c.359G>A, c.106C>T, c.262A>G, c.121C>T, c.485C>A, c.277C>T, c.149G>A"
1,unmapped,1,1.5-kbdeletionthatspannedfromintron7wellintoexon8


In [8]:
vman.code_as_chromosomal_deletion({"1.5-kbdeletionthatspannedfromintron7wellintoexon8"})
vman.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,31,"c.265A>T, c.263A>T, c.461+1G>C, c.271_273del3, c.107A>C, c.67G>C, c.62T>G, c.451dupC, c.191C>G, c.122G>A, c.186+2T>C, c.135delA, c.278A>C, c.293C>T, c.263_265del3, c.70G>T, c.279+1G>T, c.70+5G>A, c.280G>C, c.197T>A, c.22_23insC, c.408T>G, c.130C>T, c.359G>A, c.106C>T, c.262A>G, c.121C>T, c.485C>A, c.277C>T, c.149G>A, 1.5-kbdeletionthatspannedfromintron7wellintoexon8"
1,unmapped,0,


In [9]:
variant_d = vman.get_variant_d()
print(f"extracted {len(variant_d)} variants with VariantValidator")

extracted 31 variants with VariantValidator


In [10]:
import math
def decode_age_of_dx(dx_age):
    if dx_age == "newborn":
        return "P1D"
    fields = dx_age.split()
    if len(fields) != 2:
        raise ValueError(f"Bad formed{dx_age}")
    time_elem = fields[1]
    num = fields[0]
    if time_elem == "months" or time_elem == "monhts" or time_elem == "montns": 
        if "." in num:
            numbers = num.split('.')
            if numbers[1] == '5':
                return f"P{numbers[0]}M2W"
            else:
               ValueError(f"could not find parse months \"{num}\"") 
        else:
            m = int(num)
            if m > 11:
                y = math.floor(m/12)
                m = m-12*y
                return f"P{y}Y{m}M"
            else:
                return f"P{num}M"
    elif time_elem == "years":
        return f"P{num}Y"
    elif time_elem == "weeks" or time_elem == "week":
        if num == "7":
            return "P1M3W"
        else:
            return f"P{num}W"
    elif time_elem == "days" or time_elem == "day":
        return f"P{num}D"
    else:
        raise ValueError(f"could not find formed{dx_age}")

df["age"] = df['Age of diagnosis'].transform(lambda x: decode_age_of_dx(x)) 

In [11]:
ageEncoder = AgeColumnMapper.iso8601(column_name="age")
#ageEncoder.preview_column(df["age"])

In [12]:
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", unknown_symbol="nan", column_name="Gender")
#sexMapper.preview_column(df['Gender']).head()

In [13]:
column_mapper_list = list()
phenotypeColumnMapper = OptionColumnMapper(column_name='Phenotype',concept_recognizer=hpo_cr, option_d={})
column_mapper_list.append(phenotypeColumnMapper)
phenotypeColumnMapper.preview_column(df)

Unnamed: 0,mapping,count
0,Distal lower limb muscle weakness (HP:0009053) (observed),3
1,Motor regression (HP:0033044) (observed),1
2,Mental deterioration (HP:0001268) (observed),1
3,Positive Romberg sign (HP:0002403) (observed),2
4,Impaired distal tactile sensation (HP:0006937) (observed),1
...,...,...
153,Aphasia (HP:0002381) (observed),1
154,Memory impairment (HP:0002354) (observed),1
155,Irritability (HP:0000737) (observed),1
156,Abolished vibration sense (HP:0006944) (observed),1


In [14]:
prenatalUSmapper =  OptionColumnMapper(column_name='Prenatal ultrasound phenotype',concept_recognizer=hpo_cr, option_d={})
column_mapper_list.append(prenatalUSmapper)
prenatalUSmapper.preview_column(df)

Unnamed: 0,mapping,count
0,Intrauterine growth retardation (HP:0001511) (observed),1


In [15]:
mriMapper = OptionColumnMapper(column_name='MRI phenotype',concept_recognizer=hpo_cr, option_d={})
column_mapper_list.append(mriMapper)
mriMapper.preview_column(df)

Unnamed: 0,mapping,count
0,Abnormal cerebral white matter morphology (HP:0002500) (observed),2
1,Hyperintensity of cerebral white matter on MRI (HP:0030890) (observed),3
2,Subdural hemorrhage (HP:0100309) (observed),1
3,Periventricular leukomalacia (HP:0006970) (observed),1
4,Leukodystrophy (HP:0002415) (observed),3


In [16]:
# No data for cardiac
#cardiacMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d={})
#cardiacMapper.preview_column(df['Cardiac phenotype'])
#mapper_d['Cardiac phenotype'] = cardiacMapper

In [17]:
aod_d = {
    "6 years": "P6Y",
    "5 years": "P5Y",
     "4 years": "P4Y",
      "3 years": "P3Y",
    "4 months": "P4M",
    "1 month": "P1M",
    "14 months": "P1Y2M",
    "15 months": "P1Y3M",
    "6 monhts": "P6M",
    "6 months": "P6M",
    "8 months": "P8M",
    "6.5 months": "P6M2W",
     "4.5 months": "P4M2W",
       "7.5 months": "P7M2W",
    "9 months": "P9M",
    "12 months": "P1Y",
    "5 months": "P5M",
    "11 months": "P11M",
    "15 months": "P1Y3M",
     "16 months": "P1Y4M",
    "22 months": "P1Y10M",
    "30 months": "P2Y6M",
     "28 months": "P2Y4M",
}
aodMapper = AgeOfDeathColumnMapper(column_name='Age at death', string_to_iso_d=aod_d)

In [18]:
mdds6 = Disease(disease_id="OMIM:256810", disease_label="Mitochondrial DNA depletion syndrome 6 (hepatocerebral type)")
disease_d = {"256810": mdds6}
diseaseMapper = DiseaseIdColumnMapper(column_name="omim_id", disease_id_map=disease_d)

In [20]:
encoder = MixedCohortEncoder(df=df,
                            hpo_cr=hpo_cr,
                             hpo_ontology=hpo_ontology,
                             column_mapper_list=column_mapper_list,
                             individual_column_name="individual_id",
                             disease_id_mapper=diseaseMapper,
                             pmid_column="PMID",
                             title_column="title",
                             sexmapper=sexMapper,
                             agemapper=ageEncoder,
                             age_of_death_mapper=aodMapper,
                             metadata=metadata)

In [21]:
individuals = encoder.get_individuals()

In [22]:
# retrieve the variant strings and add Variant objects to each individual
# the individual id (i.id) is also the index of the pandas dataframe
for i in individuals:
    row = df.loc[i.id] 
    v1 = row['var1']
    v2 = row['var2']
    
    #print(f"{i.id}: v1={v1} and v2={v2}")
    if v1 == v2:
        var1 = variant_d.get(v1)
        var1.set_homozygous()
        i.add_variant(var1)
    else:
        var1 = variant_d.get(v1)
        var2 = variant_d.get(v2)
        var1.set_heterozygous()
        var2.set_heterozygous()
        i.add_variant(var1)
        i.add_variant(var2)


In [23]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

Level,Error category,Count
WARNING,REDUNDANT,6


In [24]:
individuals = cvalidator.get_error_free_individual_list()
table = PhenopacketTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
34979697_P1 (FEMALE; P1Y6M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.293C>T (homozygous),Distal lower limb muscle weakness (HP:0009053); Motor regression (HP:0033044); Mental deterioration (HP:0001268); Positive Romberg sign (HP:0002403); Impaired distal tactile sensation (HP:0006937); Foot dorsiflexor weakness (HP:0009027); Lower limb hyperreflexia (HP:0002395); Increased circulating lactate concentration (HP:0002151); Hyperammonemia (HP:0001987); Increased CSF protein concentration (HP:0002922); Abnormal cerebral white matter morphology (HP:0002500)
34035203_P1 (FEMALE; P1Y5M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.451dup (heterozygous) NM_002437.5:c.293C>T (heterozygous),Small for gestational age (HP:0001518); Microcephaly (HP:0000252); Delayed gross motor development (HP:0002194); Increased circulating lactate concentration (HP:0002151); Increased total bilirubin (HP:0003573); Abnormality of the coagulation cascade (HP:0003256); Increased hepatic echogenicity (HP:0031141); Hyperintensity of cerebral white matter on MRI (HP:0030890)
34023347_P1 (MALE; P1Y6M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.293C>T (homozygous),Acute hepatic failure (HP:0006554); Intellectual disability (HP:0001249)
33486010_P1 (MALE; P6M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.121C>T (homozygous),Global developmental delay (HP:0001263); Peripheral neuropathy (HP:0009830); Scoliosis (HP:0002650); Cholestasis (HP:0001396); Hepatomegaly (HP:0002240); Hypoglycemia (HP:0001943); Cirrhosis (HP:0001394)
31664948_P1 (FEMALE; P4M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.461+1G>C (homozygous),Jaundice (HP:0000952); Muscle weakness (HP:0001324); Neurodevelopmental delay (HP:0012758); Poor suck (HP:0002033); Weak cry (HP:0001612); Cirrhosis (HP:0001394); Seizure (HP:0001250); Failure to thrive (HP:0001508); Hypertyrosinemia (HP:0003231)
31664948_P2 (MALE; P5M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.277C>T (homozygous),Malaise (HP:0033834); Prolonged neonatal jaundice (HP:0006579); Dark urine (HP:0040319); Poor suck (HP:0002033); Neurodevelopmental delay (HP:0012758); Hyperbilirubinemia (HP:0002904); Hypertyrosinemia (HP:0003231); Hyperphenylalaninemia (HP:0004923)
29318572_P1 (FEMALE; P4M),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.106C>T (homozygous),Failure to thrive (HP:0001508); Jaundice (HP:0000952); Hypoglycemia (HP:0001943); Infantile axial hypotonia (HP:0009062); Muscle weakness (HP:0001324); Hepatomegaly (HP:0002240); Hepatic steatosis (HP:0001397); Conjugated hyperbilirubinemia (HP:0002908); Increased circulating lactate concentration (HP:0002151); Hypoalbuminemia (HP:0003073); Abnormality of the coagulation cascade (HP:0003256); Intrahepatic cholestasis (HP:0001406); Periportal fibrosis (HP:0001405)
29318572_P2 (FEMALE; P6M14D),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.106C>T (homozygous),Severe failure to thrive (HP:0001525); Jaundice (HP:0000952); Hypoglycemia (HP:0001943); Hepatosplenomegaly (HP:0001433); Hypotonia (HP:0001252); Hyporeflexia (HP:0001265); Conjugated hyperbilirubinemia (HP:0002908); Increased circulating lactate concentration (HP:0002151); Abnormality of the coagulation cascade (HP:0003256)
28673863_P1 (FEMALE; P11Y),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.121C>T (homozygous),Myalgia (HP:0003326); Lower limb muscle weakness (HP:0007340); Postural instability (HP:0002172); Vomiting (HP:0002013); Abdominal distention (HP:0003270); Muscle spasm (HP:0003394); Failure to thrive (HP:0001508); Cholelithiasis (HP:0001081); Lower limb asymmetry (HP:0100559); Upper limb asymmetry (HP:0100560); Hepatomegaly (HP:0002240); Cognitive impairment (HP:0100543); Decreased Achilles reflex (HP:0009072); Impaired vibratory sensation (HP:0002495); Positive Romberg sign (HP:0002403); Incoordination (HP:0002311); Elevated circulating aspartate aminotransferase concentration (HP:0031956); Increased circulating lactate concentration (HP:0002151); Increased serum pyruvate (HP:0003542); EMG: myopathic abnormalities (HP:0003458); Hyperintensity of cerebral white matter on MRI (HP:0030890)
28209105_P1 (MALE; P2Y),Mitochondrial DNA depletion syndrome 6 (hepatocerebral type) (OMIM:256810),NM_002437.5:c.149G>A (homozygous),Failure to thrive (HP:0001508); Elevated circulating alkaline phosphatase concentration (HP:0003155); Elevated circulating aspartate aminotransferase concentration (HP:0031956); Hypoalbuminemia (HP:0003073); Hypoglycemia (HP:0001943); Hypofibrinogenemia (HP:0011900); Abdominal distention (HP:0003270); Abnormal stool composition (HP:0031685); Cirrhosis (HP:0001394); Respiratory insufficiency due to muscle weakness (HP:0002747)


In [25]:
MixedCohortEncoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 50 GA4GH phenopackets to the directory phenopackets
