<H1>FBN1: Marfan syndrome (Palz, 2000)</H1>
<p>Extract phenopackets from the clinical data in <a href="https://pubmed.ncbi.nlm.nih.gov/10756346/" target="__blank">Palz et al (2000)</a>.</p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
import importlib.metadata
__version__ = importlib.metadata.version("pyphetools")
print(f"Using pyphetools version {__version__}")

Using pyphetools version 0.4.13


In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)

In [3]:
df = pd.read_table("input/palz_2000.tsv")
df

Unnamed: 0,Feature/Finding,B13,B32,B55,B73,D22,D46
0,AgeAtLastExamination(years),13,15,40,21,10,24
1,Inheritance,fam,spor,,spor,fam,spor
2,HeightOver98P,−,+,−,+,+,+
3,PectusCarinatum,+,+,−,−,−,−
4,ReducedUSLSOrIncreasedSpanheightRatio,+,+,−,−,,
5,WristAndThumbSign,+/−,+,+/−,−,+,+
6,Scoliosis,−,−,−,+,+,+
7,HyperextensibleJoints,+,−,−,−,+,+
8,Other,-,hern,hern,-,-,p_ex
9,EctopiaLentis,+,−,+,−,−,+


In [4]:
# convert to row-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
#dft=dft.rename(columns = {'Feature/Finding':'Patient'})
#dft['patient_id'] = dft.index
dft['patient_id'] = dft.index
dft.head()

Feature/Finding,AgeAtLastExamination(years),Inheritance,HeightOver98P,PectusCarinatum,ReducedUSLSOrIncreasedSpanheightRatio,WristAndThumbSign,Scoliosis,HyperextensibleJoints,Other,EctopiaLentis,DilatationOfTheAscendingAorta,MitralValveProlapse,UnusualFeatures,patient_id
B13,13,fam,−,+,+,+/−,−,+,-,+,+,+,−,B13
B32,15,spor,+,+,+,+,−,−,hern,−,+,−,−,B32
B55,40,,−,−,−,+/−,−,−,hern,+,desc,−,+,B55
B73,21,spor,+,−,−,−,+,−,-,−,−,−,+,B73
D22,10,fam,+,−,,+,+,+,-,−,+,−,−,D22


In [5]:
#Note that because Disproportionate tall stature is a child of Tall stature
#we remove Tall stature to avoid redundant annotations from records that have both terms
dft.at['B32', 'HeightOver98P'] = 'NaN' # this removes the observation. B32 also has disproportionate TS
dft.at['B32', 'Other'] = 'Inguinal hernia'
dft.at['B55', 'Other'] = """Inguinal hernia. Descending aortic dissection. High palate. Striae atrophicae. 
            Dolichocephaly. Enophthalmos. Retrognathia"""
dft.at['D46', 'Other'] = 'Pectus excavatum'
dft

Feature/Finding,AgeAtLastExamination(years),Inheritance,HeightOver98P,PectusCarinatum,ReducedUSLSOrIncreasedSpanheightRatio,WristAndThumbSign,Scoliosis,HyperextensibleJoints,Other,EctopiaLentis,DilatationOfTheAscendingAorta,MitralValveProlapse,UnusualFeatures,patient_id
B13,13,fam,−,+,+,+/−,−,+,-,+,+,+,−,B13
B32,15,spor,,+,+,+,−,−,Inguinal hernia,−,+,−,−,B32
B55,40,,−,−,−,+/−,−,−,Inguinal hernia. Descending aortic dissection. High palate. Striae atrophicae. \n Dolichocephaly. Enophthalmos. Retrognathia,+,desc,−,+,B55
B73,21,spor,+,−,−,−,+,−,-,−,−,−,+,B73
D22,10,fam,+,−,,+,+,+,-,−,+,−,−,D22
D46,24,spor,+,−,,+,+,+,Pectus excavatum,+,−,+,+,D46


In [6]:
column_mapper_d = defaultdict(ColumnMapper)
items = {
      'HeightOver98P': ['Tall stature', 'HP:0000098'],
      'PectusCarinatum': ["Pectus carinatum","HP:0000768"],
      'ReducedUSLSOrIncreasedSpanheightRatio': ['Disproportionate tall stature', 'HP:0001519'],
      'WristAndThumbSign': ['Arachnodactyly', 'HP:0001166'],
      'Scoliosis': ['Scoliosis', 'HP:0002650'],
      'HyperextensibleJoints': ['Joint hypermobility','HP:0001382'],
      'EctopiaLentis': ["Ectopia lentis", "HP:0001083"],
      'DilatationOfTheAscendingAorta': ["Aortic root aneurysm", "HP:0002616"],
      'MitralValveProlapse': ['Mitral valve prolapse', 'HP:0001634'],
}

item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, observed='+',
  excluded='-')

# Transfer to column_mapper_d

for k, v in item_column_mapper_d.items():
    column_mapper_d[k] = v

In [7]:
otherMapper = CustomColumnMapper(concept_recognizer=hpo_cr)
otherMapper.preview_column(dft['Other'])
column_mapper_d['Other'] = otherMapper

<p>The variants in the original publication were given in non-HGVS syntax. Here we add the variants using correct syntax to the dataframe.</p>

In [8]:
#variants: 
transcript = "NM_000138.5"
# NM_000138.5


# B13 G7565C C2522S  -- c.7565G>C
# B55  -- c.7331-2A>G
# D22 C7605A C2535X  -- c.7605C>A
# B73  A7871G N2624S -- c.7871A>G
# D46  C8038T R2680C -- c.8038C>T
# B32  c.8525_8529del


genome = 'hg38'
transcript='NM_000138.5' # FBN1
varMapper = VariantColumnMapper(assembly=genome,
                                column_name='Variants', 
                                transcript=transcript, 
                                default_genotype='heterozygous')


variants_d ={
    "B13": "c.7565G>C",
    "B55": "c.7331-2A>G",
    "D22": "c.7605C>A",
    "B73": "c.7871A>G",
    "D46": "c.8038C>T",
    "B32": "c.8525_8529del"
}

dft["Variants"] = variants_d

In [9]:
sex_d = {"B13": "M", 
        "B32": "M",
        "B55": "M",
        "B73": "F",
        "D22": "M",
        "D46": "M"}
dft["Sex"] = sex_d
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", column_name="Sex")

In [10]:
pmid = "PMID:10756346"
ageMapper = AgeColumnMapper.by_year("AgeAtLastExamination(years)")
encoder = CohortEncoder(df=dft, 
                        hpo_cr=hpo_cr, 
                        column_mapper_d=column_mapper_d, 
                        individual_column_name="patient_id", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata,
                        pmid=pmid)
omim_id = "OMIM:154700"
omim_label = "Marfan syndrome"
encoder.set_disease(disease_id=omim_id, label=omim_label)

In [11]:
individuals = encoder.get_individuals()

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7565G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.8525_8529del/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7331-2A>G/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7871A>G/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7605C>A/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.8038C>T/NM_000138.5?content-type=application%2Fjson


In [12]:
i1 = individuals[0]
phenopacket1 = i1.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh())
json_string = MessageToJson(phenopacket1)
print(json_string)

{
  "id": "PMID_10756346_individual_B13",
  "subject": {
    "id": "B13",
    "timeAtLastEncounter": {
      "age": {
        "iso8601duration": "P13Y"
      }
    },
    "sex": "MALE"
  },
  "phenotypicFeatures": [
    {
      "type": {
        "id": "HP:0000768",
        "label": "Pectus carinatum"
      }
    },
    {
      "type": {
        "id": "HP:0001519",
        "label": "Disproportionate tall stature"
      }
    },
    {
      "type": {
        "id": "HP:0001382",
        "label": "Joint hypermobility"
      }
    },
    {
      "type": {
        "id": "HP:0001083",
        "label": "Ectopia lentis"
      }
    },
    {
      "type": {
        "id": "HP:0002616",
        "label": "Aortic root aneurysm"
      }
    },
    {
      "type": {
        "id": "HP:0001634",
        "label": "Mitral valve prolapse"
      }
    }
  ],
  "interpretations": [
    {
      "id": "B13",
      "progressStatus": "SOLVED",
      "diagnosis": {
        "disease": {
          "id": "OMIM:15470

In [14]:
output_directory = "phenopackets"
Individual.output_individuals_as_phenopackets(individual_list=individuals,
                                              metadata=metadata.to_ga4gh(),
                                              pmid=pmid,
                                              outdir=output_directory)

We output 6 GA4GH phenopackets to the directory phenopackets
