# ISCA2

Pathogenic variants in ISCA2 cause [Multiple mitochondrial dysfunctions syndrome 4 ](https://omim.org/entry/616370). This notebook contains information from several publications.

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from IPython.display import display, HTML
import pyphetools
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import CohortValidator
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.15


In [2]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2023-10-09


In [3]:
df = pd.read_excel('input/ISCA2_curation.xlsx')
#df.head(2)

In [4]:
df['patient_id'] = df['ID']
df.set_index('patient_id', inplace=True)

In [5]:
def extract_cdna(variant):
    """
    split strings like c.772G>T(p.Gly258*) on the open-parenthesis symbol and return the first part
    """
    v = variant.split("(")[0]
    v = v.replace(" ", "").replace("p.","")
    return v
    
def extract_variant_1(variants):
    """
    Split on the slash ("/") and return the first part (or entire string for homozygous)
    """
    v1 = variants.split("/")[0]
    return extract_cdna(v1)

def extract_variant_2(variants):
    """
    Split on the slash ("/") and return the second part (or entire string for homozygous)
    """
    fields = variants.split("/")
    if len(fields) == 2:
        return extract_cdna(fields[1])
    else:
        # there was only one variant
        return extract_cdna(variants)

In [6]:
df["var1"] = df['Variant annotation'].apply(lambda x: extract_variant_1(x))
df["var2"] = df['Variant annotation'].apply(lambda x: extract_variant_2(x))

In [7]:
from time import sleep
var1_list = df["var1"].unique()
var2_list = df["var2"].unique()
var_set = set()
var_set.update(var1_list)
var_set.update(var2_list)
variant_d = {}
hg38 = "hg38"
inca2_transcript = "NM_194279.3"
vvalidator = VariantValidator(genome_build=hg38, transcript=inca2_transcript)
for v in var_set:
    print(f"{v}")
    var = vvalidator.encode_hgvs(v)
    variant_d[v] = var
    sleep(1)
print(f"extracted {len(variant_d)} variants with VariantValidator")

c.355G>A
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.355G>A/NM_194279.3?content-type=application%2Fjson
c.229G>A
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.229G>A/NM_194279.3?content-type=application%2Fjson
c.413C>G
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.413C>G/NM_194279.3?content-type=application%2Fjson
c.295delT
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.295delT/NM_194279.3?content-type=application%2Fjson
c.5C>A
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.5C>A/NM_194279.3?content-type=application%2Fjson
c.334A>G
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_194279.3%3Ac.334A>G/NM_194279.3?content-type=application%2Fjson
extracted 6 variants with VariantValidator


In [8]:
df['age in months']= df['Age of diagnosis'].apply(lambda x: x.split(" ")[0])
ageMapper = AgeColumnMapper.by_month(column_name='age in months')
#ageMapper.preview_column(df['age in months'])

In [9]:
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", unknown_symbol="nan", column_name="Gender")
#sexMapper.preview_column(df['Gender']).head()

# Mapping phenotypic features

In [10]:
mapper_d = {}
phenotypeColumnMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d={})
phenotypeColumnMapper.preview_column(df['Phenotype'])
mapper_d['Phenotype'] = phenotypeColumnMapper
# phenotypeColumnMapper.preview_column(df['Phenotype'])

In [11]:
# This column conttains no data in the original file
prenatalUSmapper =  OptionColumnMapper(concept_recognizer=hpo_cr, option_d={})
#prenatalUSmapper.preview_column(df['Prenatal ultrasound phenotype'])
#mapper_d['Prenatal ultrasound phenotype'] = prenatalUSmapper

In [12]:
mriMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d={})
mriMapper.preview_column(df['MRI phenotype'])
mapper_d['MRI phenotype'] = mriMapper

In [13]:
# This column conttains no data in the original file
cardiacMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d={})
#cardiacMapper.preview_column(df['Cardiac phenotype'])
#mapper_d['Cardiac phenotype'] = cardiacMapper

In [14]:
aod_d = {
    "9 months": "P9M",
    "3 months": "P3M",
    "28 months": "P2Y4M",
    "26 months": "P2Y2M",
    "16 months": "P1Y4M",
    "11 months": "P11M",
}
aodMapper = AgeOfDeathColumnMapper(column_name='Age at death', string_to_iso_d=aod_d)

In [15]:
mdds4 = Disease(disease_id="OMIM:616370", disease_label="Mitochondrial DNA depletion syndrome 4")
disease_d = {"616370": mdds4}
diseaseMapper = DiseaseIdColumnMapper(column_name="omim_id", disease_id_map=disease_d)

In [16]:
encoder = MixedCohortEncoder(df=df,
                            hpo_cr=hpo_cr,
                             column_mapper_d=mapper_d,
                             individual_column_name="patient_id",
                             disease_id_mapper=diseaseMapper,
                             pmid_column="PMID",
                             title_column="title",
                             sexmapper=sexMapper,
                             agemapper=ageMapper,
                             age_of_death_mapper=aodMapper,
                             metadata=metadata
                        )

In [17]:
individuals = encoder.get_individuals()
# retrieve the variant strings and add Variant objects to each individual
# the individual id (i.id) is also the index of the pandas dataframe
for i in individuals:
    row = df.loc[i.id] 
    v1 = row['var1']
    v2 = row['var2']
    #print(f"{i.id}: v1={v1} and v2={v2}")
    if v1 == v2:
        var1 = variant_d.get(v1)
        var1.set_homozygous()
        i.add_variant(var1)
    else:
        var1 = variant_d.get(v1)
        var2 = variant_d.get(v2)
        var1.set_heterozygous()
        var2.set_heterozygous()
        i.add_variant(var1)
        i.add_variant(var2)

In [18]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

Level,Error category,Count
WARNING,REDUNDANT,1


In [19]:
individuals = cvalidator.get_error_free_individual_list()
table = PhenopacketTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
29297947_P1 (MALE; P3M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Postnatal growth retardation (HP:0008897); Failure to thrive (HP:0001508); Diffuse optic disc pallor (HP:0012512); Visual impairment (HP:0000505); Low-set ears (HP:0000369); Wide nasal bridge (HP:0000431); Nasogastric tube feeding (HP:0040288); Global developmental delay (HP:0001263); Developmental regression (HP:0002376); Generalized hypotonia (HP:0001290); Spasticity (HP:0001257); Increased CSF glycine concentration (HP:0500230); Increased CSF glutamate concentration (HP:0500200); Increased CSF lactate (HP:0002490); Hyperglycinuria (HP:0003108); Leukodystrophy (HP:0002415); Hyperintensity of cerebral white matter on MRI (HP:0030890); Hyperintensity of MRI T2 signal of the spinal cord (HP:0040272)
29297947_P2 (FEMALE; P6M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Nystagmus (HP:0000639); Leg muscle stiffness (HP:0008969); Motor regression (HP:0033044); Postnatal growth retardation (HP:0008897); Macrocephaly (HP:0000256); Diffuse optic disc pallor (HP:0012512); Visual impairment (HP:0000505); Feeding difficulties in infancy (HP:0008872); Generalized joint laxity (HP:0002761); Short 4th metacarpal (HP:0010044); Cutaneous syndactyly of toes (HP:0010621); Global developmental delay (HP:0001263); Hypotonia (HP:0001252); Spasticity (HP:0001257); Leukodystrophy (HP:0002415); Hyperintensity of cerebral white matter on MRI (HP:0030890)
29359243_P1 (FEMALE; P2M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.297del (heterozygous) NM_194279.3:c.334A>G (heterozygous),Severe muscular hypotonia (HP:0006829); Nystagmus (HP:0000639); Motor regression (HP:0033044); Feeding difficulties (HP:0011968); Brisk reflexes (HP:0001348); Respiratory failure requiring assisted ventilation (HP:0004887); Increased CSF lactate (HP:0002490); Abnormal cerebral cortex morphology (HP:0002538); Abnormal diffusion weighted cerebral MRI morphology (HP:0032615); Abnormality of the internal capsule (HP:0012502)
31279336_P1 (MALE; P7M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.355G>A (homozygous),Malaise (HP:0033834); Insomnia (HP:0100785); Irritability (HP:0000737); Muscle stiffness (HP:0003552); Hypotonia (HP:0001252); Abnormal periventricular white matter morphology (HP:0002518); Abnormal cerebellum morphology (HP:0001317)
32424628_P1 (MALE; P11M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.5C>A (heterozygous) NM_194279.3:c.413C>G (heterozygous),Spastic tetraparesis (HP:0001285); Optic atrophy (HP:0000648); Cognitive impairment (HP:0100543); Developmental regression (HP:0002376); Poor appetite (HP:0004396); Vomiting (HP:0002013); Nystagmus (HP:0000639); Encephalopathy (HP:0001298); Arm dystonia (HP:0031960); Abnormal pyramidal sign (HP:0007256); Increased serum lactate (HP:0002151); Increased CSF lactate (HP:0002490); Hyperreflexia (HP:0001347); Periventricular white matter hyperintensities (HP:0030891); Thick corpus callosum (HP:0007074); Central nervous system cyst (HP:0030724)
25539947_P1 (FEMALE; P7M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Developmental regression (HP:0002376); Irritability (HP:0000737); Insomnia (HP:0100785); Spasticity (HP:0001257); Optic atrophy (HP:0000648); Hyperreflexia (HP:0001347); Periventricular white matter hyperintensities (HP:0030891); Abnormal corpus callosum morphology (HP:0001273); Abnormal midbrain morphology (HP:0002418)
29122497_P1 (FEMALE; P6M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Developmental regression (HP:0002376); Optic atrophy (HP:0000648); Nystagmus (HP:0000639); Axial hypotonia (HP:0008936); Lower limb spasticity (HP:0002061); Upper limb spasticity (HP:0006986); Confluent hyperintensity of cerebral white matter on MRI (HP:0040330)
29122497_P2 (FEMALE; P7M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Developmental regression (HP:0002376); Optic atrophy (HP:0000648); Nystagmus (HP:0000639); Axial hypotonia (HP:0008936); Lower limb spasticity (HP:0002061); Upper limb spasticity (HP:0006986); Confluent hyperintensity of cerebral white matter on MRI (HP:0040330); Hyperintensity of MRI T2 signal of the spinal cord (HP:0040272)
29122497_P3 (FEMALE; P4M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Developmental regression (HP:0002376); Optic atrophy (HP:0000648); Nystagmus (HP:0000639); Axial hypotonia (HP:0008936); Lower limb spasticity (HP:0002061); Upper limb spasticity (HP:0006986); Confluent hyperintensity of cerebral white matter on MRI (HP:0040330); Hyperintensity of MRI T2 signal of the spinal cord (HP:0040272)
29122497_P4 (FEMALE; P5M),Mitochondrial DNA depletion syndrome 4 (OMIM:616370),NM_194279.3:c.229G>A (homozygous),Developmental regression (HP:0002376); Optic atrophy (HP:0000648); Nystagmus (HP:0000639); Axial hypotonia (HP:0008936); Lower limb spasticity (HP:0002061); Upper limb spasticity (HP:0006986); Confluent hyperintensity of cerebral white matter on MRI (HP:0040330); Hyperintensity of MRI T2 signal of the spinal cord (HP:0040272)


In [20]:
MixedCohortEncoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 16 GA4GH phenopackets to the directory phenopackets
