<H1>FBN1: Marfan syndrome (Katzke, 2002)</H1>
<p>Extract phenopackets from the clinical data in <a href="https://pubmed.ncbi.nlm.nih.gov/12203992/" target="__blank">Katzke et al (2002)</a>.</p>

In [59]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
# last tested with pyphetools version 0.2.20

In [60]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)

In [79]:
df = pd.read_table("input/katzke_2002.tsv")
df.set_index('Patient', inplace=True)

In [80]:
df

Unnamed: 0_level_0,Exon,Gent,Age,Gender,Skeletal,Ocular,Cardiovascular,Other,FH,Cosegregation,HGVS,Protein
Patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
D15,2,y,25,M,14815,EL,ARD,St,+,,c.184C>T,R62C
D55,2,n,31,M,-,"EL,RD",n,*,-,+,c.184C>T,R62C
B46,2,s,12,M,,,,-,-,,c.184C>T,R62C
D10,3,n,16,F,71415,EL,n,n,-,+,c.344C>G,S115C
B3,14,e,15,F,146,"EL,M",ARD,-,-,+,c.1760G>A,C587Y
B1,14,y,8,M,4578121415,"EL,M","ARD,MVP",n,+,+,c.1787G>A,C596Y
D26,15,y,50,F,8,FC,"ARD,AR-dis",St,+,+,c.1960G>A,D654N
B9,16,y,12,M,14810,EL,ARD,*,+,,c.2055C>G,C685W
B19,16,y,3,M,4561112,EL,ARD,^,^,,c.2055C>G,C685W
D59,16,e,16,F,1461112,"EL,M",,^,^,+,c.2042C>A,S681Y


In [81]:
column_mapper_d = defaultdict(ColumnMapper)

In [82]:
def get_skeletal_items(skel):
    items = str(skel).split(",")
    d = {
        "1": "Tall stature",
        "2": "Pectus carinatum",
        "3": "Pectus excavatum",
        "4": "Arachnodactyly",
        "5": "Dolichostenomelia", # US/LSo0.86 or arm span to height
        "6": "Scoliosis",
        "7": "Limited elbow extension",
        "8": "Pes planus",
        "9": "Protrusio acetabuli",
        "10": "Pectus excavatum",
        "11": "Joint hypermobility",
        "12": "High palate",
        "13": "Malar flattening", #  typical facial appearance in the original publication
        "14": "Dolichocephaly",
        "15": "Enophthalmus"
    }
    results = []
    for it in items:
        if it in d:
            results.append(d.get(it))
        elif it == "-" or it == "nan" or len(it) == 0:
            continue
        else:
            raise ValueError(f"Could not find \"{it}\" in list")
    return " ;".join(results)


for pat_id in df.index:
    skeletal = df.at[pat_id, "Skeletal"]
    df.at[pat_id, "Skeletal"] = get_skeletal_items(skel=skeletal)
    #print(df.at[pat_id, "Skeletal"])

In [83]:
df

Unnamed: 0_level_0,Exon,Gent,Age,Gender,Skeletal,Ocular,Cardiovascular,Other,FH,Cosegregation,HGVS,Protein
Patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
D15,2,y,25,M,Tall stature ;Arachnodactyly ;Pes planus ;Enophthalmus,EL,ARD,St,+,,c.184C>T,R62C
D55,2,n,31,M,,"EL,RD",n,*,-,+,c.184C>T,R62C
B46,2,s,12,M,,,,-,-,,c.184C>T,R62C
D10,3,n,16,F,Limited elbow extension ;Dolichocephaly ;Enophthalmus,EL,n,n,-,+,c.344C>G,S115C
B3,14,e,15,F,Tall stature ;Arachnodactyly ;Scoliosis,"EL,M",ARD,-,-,+,c.1760G>A,C587Y
B1,14,y,8,M,Arachnodactyly ;Dolichostenomelia ;Limited elbow extension ;Pes planus ;High palate ;Dolichocephaly ;Enophthalmus,"EL,M","ARD,MVP",n,+,+,c.1787G>A,C596Y
D26,15,y,50,F,Pes planus,FC,"ARD,AR-dis",St,+,+,c.1960G>A,D654N
B9,16,y,12,M,Tall stature ;Arachnodactyly ;Pes planus ;Pectus excavatum,EL,ARD,*,+,,c.2055C>G,C685W
B19,16,y,3,M,Arachnodactyly ;Dolichostenomelia ;Scoliosis ;Joint hypermobility ;High palate,EL,ARD,^,^,,c.2055C>G,C685W
D59,16,e,16,F,Tall stature ;Arachnodactyly ;Scoliosis ;Joint hypermobility ;High palate,"EL,M",,^,^,+,c.2042C>A,S681Y


In [84]:
skelMapper = CustomColumnMapper(concept_recognizer=hpo_cr)
#skelMapper.preview_column(df['Skeletal'])
column_mapper_d['Skeletal'] = skelMapper

In [85]:
ophth_d ={
    "EL": "Ectopia lentis",
    "M": "Myopia",
    "RD": "Retinal detachment",
    "FC": "Flat cornea"
}
ophthMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=ophth_d)
ophthMapper.preview_column(df['Ocular'])
column_mapper_d['Ocular'] = ophthMapper

In [87]:
cv_d ={
    "ARD": "Aortic root aneurysm",
    "MVP":"Mitral valve prolapse",
    'AR-dis':"Ascending aortic dissection",
    'MVP':"Mitral valve prolapse",
    "MR2*": "Mitral regurgitation",
}
cvMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=cv_d)
cvMapper.preview_column(df['Cardiovascular'])
column_mapper_d['Cardiovascular'] = cvMapper

In [88]:
other_d = {
    "St": "Striae atrophicae",
    "H": "Inguinal hernia",
    "colob": "Lens coloboma"
}
otherMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_d)
otherMapper.preview_column(df['Other'])
column_mapper_d['Other'] = otherMapper

In [89]:
transcript = "NM_000138.5"

genome = 'hg38'
transcript='NM_000138.5' # FBN1
varMapper = VariantColumnMapper(assembly=genome,
                                column_name='HGVS', 
                                transcript=transcript, 
                                default_genotype='heterozygous')

In [90]:
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", column_name="Gender")
ageMapper = AgeColumnMapper.by_year("Age")
pmid = "PMID:12203992"
encoder = CohortEncoder(df=df, 
                        hpo_cr=hpo_cr, 
                        column_mapper_d=column_mapper_d, 
                        individual_column_name="Patient", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata,
                        pmid=pmid)
omim_id = "OMIM:154700"
omim_label = "Marfan syndrome"
encoder.set_disease(disease_id=omim_id, label=omim_label)

In [91]:
output_directory = "phenopackets"
encoder.output_phenopackets(outdir=output_directory)

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.184C>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.184C>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.184C>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.344C>G/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.1760G>A/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.1787G>A/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.1960G>A/NM_000138.5?content-type=application%2Fjson
https://rest.vari

In [93]:
%cat phenopackets/PMID_12203992_D15.json

{
  "id": "D15",
  "subject": {
    "id": "D15",
    "timeAtLastEncounter": {
      "age": {
        "iso8601duration": "P25Y"
      }
    },
    "sex": "MALE"
  },
  "phenotypicFeatures": [
    {
      "type": {
        "id": "HP:0000098",
        "label": "Tall stature"
      },
      "onset": {
        "age": {
          "iso8601duration": "P25Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001166",
        "label": "Arachnodactyly"
      },
      "onset": {
        "age": {
          "iso8601duration": "P25Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001763",
        "label": "Pes planus"
      },
      "onset": {
        "age": {
          "iso8601duration": "P25Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001083",
        "label": "Ectopia lentis"
      },
      "onset": {
        "age": {
          "iso8601duration": "P25Y"
        }
      }
    },
    {
