<h1>Acromelic Frontonasal Dysostosis (ZSWIM6): Smith et al 2014</h1>
<p>Extract the clinical data from <a href="https://pubmed.ncbi.nlm.nih.gov/25105228/"target="__blank">Smith JD, et al. (2014) Exome sequencing identifies a recurrent de novo ZSWIM6 mutation associated with acromelic frontonasal dysostosis. Am J Hum Genet ;95(2):235-40 PMID: 25105228</a>.<p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
import math
from csv import DictReader
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import re
import pyphetools
from pyphetools.creation import *
from pyphetools.visualization import *
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.6.3


In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
pmid="PMID:25105228"
title="Exome sequencing identifies a recurrent de novo ZSWIM6 mutation associated with acromelic frontonasal dysostosis"
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", pmid=pmid, pubmed_title=title)
metadata.default_versions_with_hpo(version=hpo_version)


In [3]:
df = pd.read_csv("input/smith_2014.csv", delimiter="\t")

In [4]:
df

Unnamed: 0,Feature,Individual 1,Individual 2,Individual 3,Individual 4
0,Periventricular nodular heterotopia,no,yes,no,no
1,Ventriculomegaly,yes,no,no,yes
2,Choroid plexus cyst,yes,yes,no,no
3,Dilation of Virchow-Robin spaces,no,yes,no,no
4,Agenesis of corpus callosum,no,yes,no,no
5,Hypoplasia of the corpus callosum,yes,no,no,no
6,Absent septum pellucidum,no,no,yes,no
7,Calcification of falx cerebri,yes,yes,yes,no
8,Midline central nervous system lipomas,yes,yes,yes,yes
9,Aplasia of the olfactory bulb,yes,yes,?,?


# return HpTerm(hpo_id=hpo_id, label=hpo_label, observed=False, onset=onset, resolution=offset) 
<h2>Phenotypic data</h2>
<p>Note that we need to add additional terms here following HPO update!</p>

In [5]:
patient_d = defaultdict(list)

In [6]:
for _, row in df.iterrows():
    label = row["Feature"]
    hpo_term = hpo_cr.get_term_from_label(label)
    individual_1 = row["Individual 1"]
    individual_2 = row["Individual 2"]
    individual_3 = row["Individual 3"]
    individual_4 = row["Individual 4"]
    if individual_1 == "yes":
        hpt1 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label)
    elif individual_1 == "no":
        hpt1 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label, observed=False)
    patient_d["individual_1"].append(hpt1)
    if individual_2 == "yes":
        hpt2 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label)
    elif individual_2 == "no":
        hpt2 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label, observed=False)
    patient_d["individual_2"].append(hpt2)
    if individual_3 == "yes":
        hpt3 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label)
    elif individual_3 == "no":
        hpt3 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label, observed=False)
    patient_d["individual_3"].append(hpt3)
    if individual_4 == "yes":
        hpt4 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label)
    elif individual_4 == "no":
        hpt4 = HpTerm(hpo_id=hpo_term.id, label=hpo_term.label, observed=False)
    patient_d["individual_4"].append(hpt4)
    #@print(f"{label}: {hpo_term}")

<h2>Variant</h2>
<p>Sequencing of ZSWIM6 (RefSeq NM_020928.1)  confirms the exon 14 c.3487C>T mutation that leads to the amino acid substitution p.Arg1163Trp in all four cases.</p>

In [7]:
validator = VariantValidator(genome_build='hg38')
transcript = "NM_020928.2"
var_hgvs = "c.3487C>T"
v = validator.encode_hgvs(hgvs=var_hgvs, custom_transcript=transcript)
v.set_heterozygous()


https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_020928.2%3Ac.3487C>T/NM_020928.2?content-type=application%2Fjson


In [8]:
individual_list = []
disease_label = "Acromelic frontonasal dysostosis"
disease_id = "OMIM:603671"
for pat_id, hpo_list in patient_d.items():
    if hpo_list is None:
        print(f"Could not find hpo list for {pat_id}")
        continue
    if len(hpo_list) == 0:
        print(f"warning, empty HPO list for {pat_id}")
    ind = Individual(individual_id=pat_id, 
                     hpo_terms=hpo_list, 
                     interpretation_list=[v.to_ga4gh_variant_interpretation()], 
                      disease_id=disease_id, 
                     disease_label=disease_label )
    individual_list.append(ind)
    
print(f"Created {len(individual_list)} individual objects")

Created 4 individual objects


In [9]:
i1 = individual_list[0]
phenopacket1 = i1.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh())
json_string = MessageToJson(phenopacket1)
print(json_string)

{
  "id": "individual_1",
  "subject": {
    "id": "individual_1"
  },
  "phenotypicFeatures": [
    {
      "type": {
        "id": "HP:0032388",
        "label": "Periventricular nodular heterotopia"
      },
      "excluded": true
    },
    {
      "type": {
        "id": "HP:0002119",
        "label": "Ventriculomegaly"
      }
    },
    {
      "type": {
        "id": "HP:0002190",
        "label": "Choroid plexus cyst"
      }
    },
    {
      "type": {
        "id": "HP:0012520",
        "label": "Dilation of Virchow-Robin spaces"
      },
      "excluded": true
    },
    {
      "type": {
        "id": "HP:0001274",
        "label": "Agenesis of corpus callosum"
      },
      "excluded": true
    },
    {
      "type": {
        "id": "HP:0002079",
        "label": "Hypoplasia of the corpus callosum"
      }
    },
    {
      "type": {
        "id": "HP:0001331",
        "label": "Absent septum pellucidum"
      },
      "excluded": true
    },
    {
      "type": {
    

In [10]:
ppacket_list = [i.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh()) for i in individual_list]
table = PhenopacketTable(phenopacket_list=ppacket_list)
from IPython.display import display, HTML
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
individual_1 (UNKNOWN; ),Acromelic frontonasal dysostosis (OMIM:603671),NM_020928.2:c.3487C>T (heterozygous),Ventriculomegaly (HP:0002119); Choroid plexus cyst (HP:0002190); Hypoplasia of the corpus callosum (HP:0002079); Calcification of falx cerebri (HP:0005462); Midline central nervous system lipomas (HP:0006866); Aplasia of the olfactory bulb (HP:0032466); Large sella turcica (HP:0002690); Glaucoma (HP:0000501); Hypertelorism (HP:0000316); Ptosis (HP:0000508); Telecanthus (HP:0000506); Midline facial cleft (HP:0100629); Bifid nose (HP:0011803); Cleft palate (HP:0000175); Parietal foramina (HP:0002697); Vertical clivus (HP:0010559); Preaxial foot polydactyly (HP:0001841); Short tibia (HP:0005736); Patellar hypoplasia (HP:0003065); Hypopituitarism (HP:0040075); Cryptorchidism (HP:0000028); Global developmental delay (HP:0001263)
individual_2 (UNKNOWN; ),Acromelic frontonasal dysostosis (OMIM:603671),NM_020928.2:c.3487C>T (heterozygous),Periventricular nodular heterotopia (HP:0032388); Choroid plexus cyst (HP:0002190); Dilation of Virchow-Robin spaces (HP:0012520); Agenesis of corpus callosum (HP:0001274); Calcification of falx cerebri (HP:0005462); Midline central nervous system lipomas (HP:0006866); Aplasia of the olfactory bulb (HP:0032466); Myopia (HP:0000545); Remnants of the hyaloid vascular system (HP:0007968); Hypertelorism (HP:0000316); Ptosis (HP:0000508); Telecanthus (HP:0000506); Midline facial cleft (HP:0100629); Bifid nose (HP:0011803); Submucous cleft soft palate (HP:0011819); Parietal foramina (HP:0002697); Vertical clivus (HP:0010559); Preaxial foot polydactyly (HP:0001841); Short tibia (HP:0005736); Patellar hypoplasia (HP:0003065); Hypopituitarism (HP:0040075); Global developmental delay (HP:0001263)
individual_3 (UNKNOWN; ),Acromelic frontonasal dysostosis (OMIM:603671),NM_020928.2:c.3487C>T (heterozygous),Absent septum pellucidum (HP:0001331); Calcification of falx cerebri (HP:0005462); Midline central nervous system lipomas (HP:0006866); Large sella turcica (HP:0002690); Myopia (HP:0000545); Optic Nerve Hypoplasia (HP:0000609); Hypertelorism (HP:0000316); Ptosis (HP:0000508); Telecanthus (HP:0000506); Midline facial cleft (HP:0100629); Bifid nose (HP:0011803); Parietal foramina (HP:0002697); Vertical clivus (HP:0010559); Preaxial foot polydactyly (HP:0001841); Short tibia (HP:0005736); Patellar hypoplasia (HP:0003065); Global developmental delay (HP:0001263)
individual_4 (UNKNOWN; ),Acromelic frontonasal dysostosis (OMIM:603671),NM_020928.2:c.3487C>T (heterozygous),Ventriculomegaly (HP:0002119); Midline central nervous system lipomas (HP:0006866); Large sella turcica (HP:0002690); Retrocerebellar cyst (HP:0006951); Hypertelorism (HP:0000316); Ptosis (HP:0000508); Telecanthus (HP:0000506); Midline facial cleft (HP:0100629); Bifid nose (HP:0011803); Vertical clivus (HP:0010559); Preaxial foot polydactyly (HP:0001841); Global developmental delay (HP:0001263)


In [11]:
Individual.output_individuals_as_phenopackets(individual_list=individual_list,
                                              pmid=pmid,
                                              metadata=metadata.to_ga4gh())

We output 4 GA4GH phenopackets to the directory phenopackets
