# SC5D individuals

In [1]:
import pandas as pd
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import HpoParser,CaseTemplateEncoder,VariantManager,AllelicRequirement
from pyphetools.visualization import QcVisualizer,IndividualTable
from pyphetools.validation import CohortValidator
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.48


In [2]:
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
created_by="ORCID:0000-0002-0736-9199"
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_excel("input/SC5D_individuals.xlsx")
df.head(2)

Unnamed: 0,PMID,title,individual_id,comment,disease_id,disease_label,transcript,allele_1,allele_2,variant.comment,...,Elevated circulating alkaline phosphatase concentration,Hyperbilirubinemia,Hyperammonemia,Osteoporosis,Elevated circulating lathosterol concentration,Abnormal circulating cholesterol concentration,Anisopoikilocytosis,Butterfly vertebrae,High palate,Axial hypotonia
0,str,str,str,optional str,str,str,str,HGVS str,HGVS str,optional str,...,HP:0003155,HP:0002904,HP:0001987,HP:0000939,HP:0034936,HP:0003107,HP:0004823,HP:0003316,HP:0000218,HP:0008936
1,PMID:12812989,Lathosterolosis: an inborn error of human and murine cholesterol synthesis due to lathosterol 5-desaturase deficiency,atypical SLOS patient,,OMIM:607330,Lathosterolosis,NM_006918.5,c.137A>C,c.137A>C,p.Tyr46Ser; VCV000007356.1,...,na,na,na,na,na,na,na,na,na,


In [4]:
encoder = CaseTemplateEncoder(df=df, hpo_cr=hpo_cr, created_by=created_by)
individuals = encoder.get_individuals()

Created encoders for 53 fields


In [5]:
SC5D_transcript = "NM_006918.5"
vmanager = VariantManager(df=df,
                          individual_column_name="individual_id",
                          gene_symbol="SC5D",
                          transcript=SC5D_transcript,
                          allele_1_column_name="allele_1",
                          allele_2_column_name="allele_2")

[INFO] encoding variant "c.632G>A"
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006918.5%3Ac.632G>A/NM_006918.5?content-type=application%2Fjson
[INFO] encoding variant "c.86G>A"
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006918.5%3Ac.86G>A/NM_006918.5?content-type=application%2Fjson


In [6]:
vmanager.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,3,"c.137A>C, c.632G>A, c.86G>A"
1,unmapped,0,


In [7]:
vmanager.add_variants_to_individuals(individuals)

In [8]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

Level,Error category,Count
ERROR,CONFLICT,1


In [9]:
individuals = cvalidator.get_error_free_individual_list()
table = IndividualTable(individuals)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
atypical SLOS patient (UNKNOWN; P0D),Lathosterolosis (OMIM:607330),NM_006918.5:c.137A>C (homozygous),"P0D: Microcephaly (HP:0000252); Cataract (HP:0000518); Short nose (HP:0003196); Micrognathia (HP:0000347); Alveolar ridge overgrowth (HP:0009085); Ambiguous genitalia, male (HP:0000033); 2-3 toe cutaneous syndactyly (HP:0005709) Postaxial foot polydactyly (HP:0001830); Reduced 3-beta-hydroxysteroid-delta-5-desaturase activity in cultured fibroblasts (HP:6000199); excluded: Postaxial hand polydactyly (HP:0001162); excluded: Talipes equinovarus (HP:0001762); excluded: Lumbosacral meningocele (HP:0200133); excluded: 2-4 toe cutaneous syndactyly (HP:0005768)"
Patient 1 (UNKNOWN; ),Lathosterolosis (OMIM:607330),NM_006918.5:c.86G>A (heterozygous) NM_006918.5:c.632G>A (heterozygous),Microcephaly (HP:0000252); Postaxial hand polydactyly (HP:0001162); Chiari type II malformation (HP:0025660); Talipes equinovarus (HP:0001762); Lumbosacral meningocele (HP:0200133); excluded: 2-3 toe cutaneous syndactyly (HP:0005709); excluded: Postaxial foot polydactyly (HP:0001830); excluded: Foam cells with lamellar inclusion bodies (HP:0003609); excluded: 2-4 toe cutaneous syndactyly (HP:0005768); excluded: Horseshoe kidney (HP:0000085); excluded: Bilobate gallbladder (HP:0005608); excluded: Intrahepatic cholestasis (HP:0001406)
Patient 2 (FEMALE; P0D),Lathosterolosis (OMIM:607330),NM_006918.5:c.86G>A (heterozygous) NM_006918.5:c.632G>A (heterozygous),P6Y: Cataract (HP:0000518) P7Y: Micrognathia (HP:0000347) Microcephaly (HP:0000252); Postaxial foot polydactyly (HP:0001830); 2-4 toe cutaneous syndactyly (HP:0005768); Horseshoe kidney (HP:0000085); Bilobate gallbladder (HP:0005608); Intrahepatic cholestasis (HP:0001406); Ptosis (HP:0000508); Prominent nasal tip (HP:0005274); Thick upper lip vermilion (HP:0000215); Narrow forehead (HP:0000341); Epicanthus (HP:0000286); Anteverted nares (HP:0000463); Long philtrum (HP:0000343); Hepatic fibrosis (HP:0001395); Elevated gamma-glutamyltransferase level (HP:0030948); Elevated circulating alanine aminotransferase concentration (HP:0031964); Elevated circulating aspartate aminotransferase concentration (HP:0031956); Elevated circulating alkaline phosphatase concentration (HP:0003155); Hyperbilirubinemia (HP:0002904); Hyperammonemia (HP:0001987); Osteoporosis (HP:0000939); Elevated circulating lathosterol concentration (HP:0034936); Anisopoikilocytosis (HP:0004823); Butterfly vertebrae (HP:0003316); excluded: Short nose (HP:0003196); excluded: 2-3 toe cutaneous syndactyly (HP:0005709); excluded: Postaxial hand polydactyly (HP:0001162); excluded: Chiari type II malformation (HP:0025660); excluded: Talipes equinovarus (HP:0001762); excluded: Lumbosacral meningocele (HP:0200133)


In [10]:
encoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 3 GA4GH phenopackets to the directory phenopackets


In [11]:
from pyphetools.visualization import PhenopacketIngestor,HpoaTableBuilder
ingestor = PhenopacketIngestor(indir="phenopackets")
ppkt_d = ingestor.get_phenopacket_dictionary()
ppkt_list = list(ppkt_d.values())

[pyphetools] Ingested 3 GA4GH phenopackets.


In [13]:
builder = HpoaTableBuilder(phenopacket_list=ppkt_list)
PMID = "PMID:12812989"
builder.autosomal_recessive(PMID);

In [15]:
hpoa_table_creator = builder.build()
df = hpoa_table_creator.get_dataframe()
df.head(5)

We found a total of 36 unique HPO terms
Extracted disease: Lathosterolosis (OMIM:607330)


Unnamed: 0,#diseaseID,diseaseName,phenotypeID,phenotypeName,onsetID,onsetName,frequency,sex,negation,modifier,description,publication,evidence,biocuration
0,OMIM:607330,Lathosterolosis,HP:0000252,Microcephaly,,,2/2,,,,,PMID:17853487,PCS,ORCID:0000-0002-0736-9199
1,OMIM:607330,Lathosterolosis,HP:0001162,Postaxial hand polydactyly,,,1/2,,,,,PMID:17853487,PCS,ORCID:0000-0002-0736-9199
2,OMIM:607330,Lathosterolosis,HP:0025660,Chiari type II malformation,,,1/2,,,,,PMID:17853487,PCS,ORCID:0000-0002-0736-9199
3,OMIM:607330,Lathosterolosis,HP:0001762,Talipes equinovarus,,,1/2,,,,,PMID:17853487,PCS,ORCID:0000-0002-0736-9199
4,OMIM:607330,Lathosterolosis,HP:0200133,Lumbosacral meningocele,,,1/2,,,,,PMID:17853487,PCS,ORCID:0000-0002-0736-9199


In [16]:
hpoa_table_creator.write_data_frame()

Wrote HPOA disease file to OMIM-607330.tab
