# GCSH individuals

In [1]:
import pandas as pd
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.54


In [2]:
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
created_by="ORCID:0000-0002-0736-9199"
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_excel("input/GCSH_individuals.xlsx")
df.head(2)

Unnamed: 0,PMID,title,individual_id,comment,disease_id,disease_label,transcript,allele_1,allele_2,variant.comment,...,Increased CSF glycine concentration,Hyperglycinemia,Elevated brain lactate level by MRS,Elevated brain glycine level by MRS,Hypotonia,Seizure,Hypernatremia,Metabolic acidosis,Global developmental delay,Miscellaneous
0,str,str,str,optional str,str,str,str,str,str,optional str,...,HP:0500230,HP:0002154,HP:0012707,HP:0034893,HP:0001252,HP:0001250,HP:0003228,HP:0001942,HP:0001263,Miscellaneous
1,PMID: 33890291,"Biallelic start loss variant, c.1A > G in GCSH is associated with variant nonketotic hyperglycinemia",Proband 1,,OMIM:620423,Multiple mitochondrial dysfunctions syndrome 7,NM_004483.5,c.1A>G,c.1A>G,,...,P3Y,P3Y,P3Y,P3Y,excluded,observed,excluded,excluded,observed,Dolichocephaly;Anteverted nares;Open mouth;Gingival overgrowth;Hypertonia;Hyperreflexia;Ankle clonus


In [4]:
encoder = CaseTemplateEncoder(df=df, hpo_cr=hpo_cr, created_by=created_by)
individuals = encoder.get_individuals()

Created encoders for 28 fields


In [5]:
GCSH_transcript = "NM_004483.5"
vmanager = VariantManager(df=df,
                          individual_column_name="individual_id",
                          gene_symbol="GCSH",
                          transcript=GCSH_transcript,
                          allele_1_column_name="allele_1",
                          allele_2_column_name="allele_2")

In [6]:
vmanager.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,6,"c.1A>G, c.226C>T, c.170A>G, c.442A>C, c.344C>T, c.293-2_293-1insT"
1,unmapped,0,


In [7]:
vmanager.add_variants_to_individuals(individuals)

In [8]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))


Level,Error category,Count
WARNING,REDUNDANT,2


In [9]:
individuals = cvalidator.get_error_free_individual_list()
table = IndividualTable(individuals)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
Proband 1 (MALE; P3Y),Multiple mitochondrial dysfunctions syndrome 7 (OMIM:620423),NM_004483.5:c.1A>G (homozygous),P1M: Lethargy (HP:0001254); Feeding difficulties (HP:0011968); Myoclonic seizure (HP:0032794) P3Y: Secondary microcephaly (HP:0005484); Increased CSF glycine concentration (HP:0500230); Hyperglycinemia (HP:0002154); Elevated brain lactate level by MRS (HP:0012707); Elevated brain glycine level by MRS (HP:0034893) Global developmental delay (HP:0001263); Dolichocephaly (HP:0000268); Anteverted nares (HP:0000463); Open mouth (HP:0000194); Gingival overgrowth (HP:0000212); Hypertonia (HP:0001276); Ankle clonus (HP:0011448); excluded: Hypotonia (HP:0001252); excluded: Hypernatremia (HP:0003228); excluded: Metabolic acidosis (HP:0001942)
Proband 2 (FEMALE; P1Y1M),Multiple mitochondrial dysfunctions syndrome 7 (OMIM:620423),NM_004483.5:c.1A>G (homozygous),P2D: Lethargy (HP:0001254); Feeding difficulties (HP:0011968); Hypotonia (HP:0001252); Seizure (HP:0001250) P3M: Elevated brain lactate level by MRS (HP:0012707); Elevated brain glycine level by MRS (HP:0034893) Hypernatremia (HP:0003228); Metabolic acidosis (HP:0001942); Global developmental delay (HP:0001263)
Proband 3 (FEMALE; P6D),Multiple mitochondrial dysfunctions syndrome 7 (OMIM:620423),NM_004483.5:c.1A>G (homozygous),P3D: Lethargy (HP:0001254) Poor suck (HP:0002033); Partial atrioventricular canal defect (HP:0011577)


In [10]:
encoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 3 GA4GH phenopackets to the directory phenopackets
