# ATP6V0C

Data derived from [Mattison KA, et al. (2023) ATP6V0C variants impair V-ATPase function causing a neurodevelopmental disorder often associated with epilepsy. Brain](https://pubmed.ncbi.nlm.nih.gov/36074901/)

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import display, HTML
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.51


In [2]:
PMID = "PMID:36074901"
title = "ATP6V0C variants impair V-ATPase function causing a neurodevelopmental disorder often associated with epilepsy"
cite = Citation(pmid=PMID, title=title)
metadata = MetaData(created_by="ORCID:0000-0002-5648-2155", citation=cite)
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_csv("input/ATP6V0C_Mattison_2023.txt", sep="\t")

In [4]:
df.head(3)

Unnamed: 0,Patient,Varianta,CADD Scoreb,Inheritance,Seizures (age at onset),Seizure types,Developmental delay,Intellectual disabilityc
0,1d,c.85G > A; p.G29S,26.2,de novo,,,,
1,2e,c.134_135delCT; p.(S45CfsTer37),,de novo,Yes (7 mo),"GTCS, At, FDS, Myo, T",,"Severe, with regression"
2,3,c.143G > C; p.R48P,27.2,de novo,Yes (18 mo),,"Yes, motor and speech",


In [5]:
def extract_hgvs(var_string):
    """
    Variants are expressed using NM_001694.4. in the original publication
    # Note stop-loss c.467A > T (p.*156Leuext*35) 
    """
    fields = var_string.split(";")
    hgvs = fields[0].replace(" ","")
    ## correct HGVS error
    if hgvs == "c.134_135delCT":
        return "c.134_135del"
    elif hgvs == "c.340_355del16":
        return "c.340_355del"
    elif hgvs == "c.352_362delins":
        return "c.352_361del"
        # this is NP_001685.1:p.(Val118ProfsTer11)
    return hgvs

df["NM_001694.4"] = df["Varianta"].apply(lambda x: extract_hgvs(x))

In [6]:
column_mapper_list = list()

In [7]:
import re
def get_onset_age(age_str):
    if not isinstance(age_str, str):
        return "na" #emptry, not available
    match = re.search(r"(\d+) mo", age_str)
    if match:
        months = match.group(1)
        m = int(months)
        if m < 12:
            return f"P{m}M"
        else:
            y = m // 12
            m = m % 12
            return f"P{y}Y{m}M"
        return months
    match = re.search(r"(\d+) yr", age_str)
    if match:
        years = match.group(1)
        y = int(years)
        return f"P{y}Y"
    else:
        return "na"
    

df["age_of_onset"] = df["Seizures (age at onset)"].apply(lambda x: get_onset_age(x))

In [8]:
seizure_types_d = {
 'GTCS': 'Bilateral tonic-clonic seizure',
 'At': 'Atonic seizure',
 'FDS': 'Focal impaired awareness seizure',
 'Myo': 'Myoclonic seizure',
 'T': 'Tonic seizure',
 'Cryptogenic focal': 'Focal-onset seizure',
 'Infantile spasms': 'Infantile spasms',
 'Ab': 'Generalized non-motor (absence) seizure',
 'FOA': 'Focal-onset seizure',
 'Febrile': 'Febrile seizure (within the age range of 3 months to 6 years)',
 'staring': 'Generalized non-motor (absence) seizure',
 'T (nocturnal)': 'Tonic seizure',
 'Focal with secondary generalization': 'Bilateral tonic-clonic seizure with focal onset',
 'focal to bilateral TCS': 'Bilateral tonic-clonic seizure with focal onset',
 'FIA': 'Focal impaired awareness seizure',
 'TCS': 'Bilateral tonic-clonic seizure',
 'multifocal': 'Multifocal seizures',
 'Infantile flexor spasms': 'Infantile spasms',
 'T (w/asymmetrical limb stiffening)': 'Tonic seizure',
 'afebrile': 'Seizure'}
excluded = {}
seizure_typesMapper = OptionColumnMapper(column_name="Seizure types", concept_recognizer=hpo_cr, option_d=seizure_types_d, excluded_d=excluded)
column_mapper_list.append(seizure_typesMapper)
seizure_typesMapper.preview_column(df)

Unnamed: 0,mapping,count
0,Bilateral tonic-clonic seizure (HP:0002069) (observed),16
1,Atonic seizure (HP:0010819) (observed),6
2,Focal impaired awareness seizure (HP:0002384) (observed),2
3,Myoclonic seizure (HP:0032794) (observed),5
4,Tonic seizure (HP:0032792) (observed),5
5,Focal-onset seizure (HP:0007359) (observed),4
6,Infantile spasms (HP:0012469) (observed),3
7,Generalized non-motor (absence) seizure (HP:0002121) (observed),6
8,Febrile seizure (within the age range of 3 months to 6 years) (HP:0002373) (observed),4
9,Bilateral tonic-clonic seizure with focal onset (HP:0007334) (observed),2


In [9]:
developmental_delay_d = {
 'Yes': 'Global developmental delay',
 'motor and speech': 'Global developmental delay',
 'psychomotor': 'Global developmental delay',
 'non-verbal': 'Absent speech',
 'regression to non-verbal': 'Global developmental delay',
 'speaks only in short sentences': 'Delayed speech and language development',
 'speech': 'Delayed speech and language development',
 'fine motor': 'Global developmental delay'}
excluded = {'No':"Global developmental delay"}
developmental_delayMapper = OptionColumnMapper(column_name="Developmental delay", concept_recognizer=hpo_cr, option_d=developmental_delay_d, excluded_d=excluded)
column_mapper_list.append(developmental_delayMapper)
developmental_delayMapper.preview_column(df)

Unnamed: 0,mapping,count
0,Global developmental delay (HP:0001263) (observed),32
1,Absent speech (HP:0001344) (observed),4
2,Global developmental delay (HP:0001263) (excluded),2
3,Delayed speech and language development (HP:0000750) (observed),2


In [10]:
intellectual_disabilityc_d = {
 'Severe': 'Intellectual disability, severe',
 'with regression': 'Intellectual disability, severe',
 'Profound': 'Intellectual disability, profound',
 'Moderate': 'Intellectual disability, moderate',
 'Yes': 'Intellectual disability',
 'Mild': 'Intellectual disability, mild',
 'regression in adulthood': 'Developmental regression'}
excluded = {}
intellectual_disabilitycMapper = OptionColumnMapper(column_name="Intellectual disabilityc", concept_recognizer=hpo_cr, option_d=intellectual_disabilityc_d, excluded_d=excluded)
column_mapper_list.append(intellectual_disabilitycMapper)
intellectual_disabilitycMapper.preview_column(df)

Unnamed: 0,mapping,count
0,"Intellectual disability, severe (HP:0010864) (observed)",5
1,"Intellectual disability, profound (HP:0002187) (observed)",4
2,"Intellectual disability, moderate (HP:0002342) (observed)",1
3,Intellectual disability (HP:0001249) (observed),2
4,"Intellectual disability, mild (HP:0001256) (observed)",4
5,Developmental regression (HP:0002376) (observed),1


In [11]:
vman = VariantManager(df=df, individual_column_name="Patient",
                      gene_symbol="ATP6V0C", allele_1_column_name="NM_001694.4", transcript="NM_001694.4")

In [12]:
EPEO3 = Disease(disease_label="Epilepsy, early-onset, 3, with or without developmental delay", disease_id="OMIM:620465")

ageMapper = AgeColumnMapper.iso8601(column_name="age_of_onset")
sexMapper = SexColumnMapper.not_provided()
individual_column_name = 'Patient'

varMapper = VariantColumnMapper(variant_d=vman.get_variant_d(), variant_column_name="NM_001694.4", default_genotype="heterozygous")

encoder = CohortEncoder(df=df, 
                        hpo_cr=hpo_cr, 
                        column_mapper_list=column_mapper_list, 
                        individual_column_name=individual_column_name,
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata)
encoder.set_disease(EPEO3)

In [13]:
individuals = encoder.get_individuals()

Could not parse the following as ISO8601 ages: na (n=9)


In [14]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

Level,Error category,Count
ERROR,INSUFFICIENT_HPOS,3
WARNING,REDUNDANT,2

ID,Level,Category,Message,HPO Term
PMID_36074901_1d,ERROR,INSUFFICIENT_HPOS,Minimum HPO terms required 1 but only 0 found,
PMID_36074901_12d,ERROR,INSUFFICIENT_HPOS,Minimum HPO terms required 1 but only 0 found,
PMID_36074901_23d,ERROR,INSUFFICIENT_HPOS,Minimum HPO terms required 1 but only 0 found,


In [15]:
individuals = cvalidator.get_error_free_individual_list()
table = PhenopacketTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
2e (UNKNOWN; P7M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.134_135del (heterozygous),"Bilateral tonic-clonic seizure (HP:0002069); Tonic seizure (HP:0032792); Atonic seizure (HP:0010819); Myoclonic seizure (HP:0032794); Intellectual disability, severe (HP:0010864); Focal impaired awareness seizure (HP:0002384)"
3 (UNKNOWN; P1Y6M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.143G>C (heterozygous),Global developmental delay (HP:0001263)
4 (UNKNOWN; ),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.158T>G (heterozygous),Global developmental delay (HP:0001263)
5f (UNKNOWN; ),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.172C>G (heterozygous),Focal-onset seizure (HP:0007359); Global developmental delay (HP:0001263)
6 (UNKNOWN; P8M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.188G>C (heterozygous),"Infantile spasms (HP:0012469); Bilateral tonic-clonic seizure (HP:0002069); Atonic seizure (HP:0010819); Myoclonic seizure (HP:0032794); Global developmental delay (HP:0001263); Absent speech (HP:0001344); Intellectual disability, severe (HP:0010864)"
7 (UNKNOWN; ),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.214del (heterozygous),Infantile spasms (HP:0012469); Global developmental delay (HP:0001263)
8 (UNKNOWN; P1Y),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.220G>T (heterozygous),"Bilateral tonic-clonic seizure (HP:0002069); Global developmental delay (HP:0001263); Generalized non-motor (absence) seizure (HP:0002121); Focal-onset seizure (HP:0007359); Intellectual disability, severe (HP:0010864)"
9 (UNKNOWN; P10M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.283G>A (heterozygous),"Bilateral tonic-clonic seizure (HP:0002069); Global developmental delay (HP:0001263); Febrile seizure (within the age range of 3 months to 6 years) (HP:0002373); Intellectual disability, profound (HP:0002187); Focal-onset seizure (HP:0007359)"
10 (UNKNOWN; P10M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.283G>C (heterozygous),Bilateral tonic-clonic seizure (HP:0002069); Generalized non-motor (absence) seizure (HP:0002121); Global developmental delay (HP:0001263); Absent speech (HP:0001344)
11 (UNKNOWN; P5M),"Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)",NM_001694.4:c.284C>T (heterozygous),"Febrile seizure (within the age range of 3 months to 6 years) (HP:0002373); Generalized non-motor (absence) seizure (HP:0002121); Myoclonic seizure (HP:0032794); Tonic seizure (HP:0032792); Intellectual disability, moderate (HP:0002342); excluded: Global developmental delay (HP:0001263)"


In [16]:
Individual.output_individuals_as_phenopackets(individual_list=individuals, metadata=metadata)

We output 24 GA4GH phenopackets to the directory phenopackets


In [17]:
ingestor = PhenopacketIngestor(indir="phenopackets")
ppkt_d = ingestor.get_phenopacket_dictionary()
ppkt_list = list(ppkt_d.values())

[pyphetools] Ingested 24 GA4GH phenopackets.


In [18]:
builder = HpoaTableBuilder(phenopacket_list=ppkt_list)
hpoa_table_creator = builder.autosomal_dominant(PMID).build()

We found a total of 20 unique HPO terms
Extracted disease: Epilepsy, early-onset, 3, with or without developmental delay (OMIM:620465)


In [19]:
hpoa_table_creator.get_dataframe()

Unnamed: 0,#diseaseID,diseaseName,phenotypeID,phenotypeName,onsetID,onsetName,frequency,sex,negation,modifier,description,publication,evidence,biocuration
0,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0002069,Bilateral tonic-clonic seizure,,,13/13,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
1,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0001263,Global developmental delay,,,21/23,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
2,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0010864,"Intellectual disability, severe",,,4/4,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
3,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0002373,Febrile seizure (within the age range of 3 months to 6 years),,,4/4,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
4,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0002187,"Intellectual disability, profound",,,4/4,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
5,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0007359,Focal-onset seizure,,,4/4,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
6,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0007334,Bilateral tonic-clonic seizure with focal onset,,,2/2,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
7,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0000750,Delayed speech and language development,,,2/2,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
8,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0001249,Intellectual disability,,,2/2,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155
9,OMIM:620465,"Epilepsy, early-onset, 3, with or without developmental delay",HP:0002121,Generalized non-motor (absence) seizure,,,6/6,,,,,PMID:36074901,PCS,ORCID:0000-0002-5648-2155


In [20]:
hpoa_table_creator.write_data_frame()

Wrote HPOA disease file to OMIM-620465.tab
