<H1>FBN1: Marfan syndrome (Palz, 2000)</H1>
<p>Extract phenopackets from the clinical data in <a href="https://pubmed.ncbi.nlm.nih.gov/10756346/" target="__blank">Palz et al (2000)</a>.</p>

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import display, HTML
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.36


In [2]:
PMID = "PMID:10756346"
title = "Clustering of mutations associated with mild Marfan-like phenotypes in the 3' region of FBN1 suggests a potential genotype-phenotype correlation"
cite = Citation(pmid=PMID, title=title)

hpo_version = '2024-01-16'
hpo_url = f'https://github.com/obophenotype/human-phenotype-ontology/releases/download/v{hpo_version}/hp.json'
parser = HpoParser(hpo_json_file=hpo_url)
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", citation=cite)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2024-01-16


In [3]:
df = pd.read_table("input/palz_2000.tsv")
df

Unnamed: 0,Feature/Finding,B13,B32,B55,B73,D22,D46
0,AgeAtLastExamination(years),13,15,40,21,10,24
1,Inheritance,fam,spor,,spor,fam,spor
2,HeightOver98P,−,+,−,+,+,+
3,PectusCarinatum,+,+,−,−,−,−
4,ReducedUSLSOrIncreasedSpanheightRatio,+,+,−,−,,
5,WristAndThumbSign,+/−,+,+/−,−,+,+
6,Scoliosis,−,−,−,+,+,+
7,HyperextensibleJoints,+,−,−,−,+,+
8,Other,-,hern,hern,-,-,p_ex
9,EctopiaLentis,+,−,+,−,−,+


In [4]:
# convert to row-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
#dft=dft.rename(columns = {'Feature/Finding':'Patient'})
#dft['patient_id'] = dft.index
dft['patient_id'] = dft.index
dft.head()

Feature/Finding,AgeAtLastExamination(years),Inheritance,HeightOver98P,PectusCarinatum,ReducedUSLSOrIncreasedSpanheightRatio,WristAndThumbSign,Scoliosis,HyperextensibleJoints,Other,EctopiaLentis,DilatationOfTheAscendingAorta,MitralValveProlapse,UnusualFeatures,patient_id
B13,13,fam,−,+,+,+/−,−,+,-,+,+,+,−,B13
B32,15,spor,+,+,+,+,−,−,hern,−,+,−,−,B32
B55,40,,−,−,−,+/−,−,−,hern,+,desc,−,+,B55
B73,21,spor,+,−,−,−,+,−,-,−,−,−,+,B73
D22,10,fam,+,−,,+,+,+,-,−,+,−,−,D22


In [5]:
#Note that because Disproportionate tall stature is a child of Tall stature
#we remove Tall stature to avoid redundant annotations from records that have both terms
dft.at['B32', 'HeightOver98P'] = 'NaN' # this removes the observation. B32 also has disproportionate TS
dft.at['B32', 'Other'] = 'Inguinal hernia'
dft.at['B55', 'Other'] = """Inguinal hernia. Descending aortic dissection. High palate. Striae atrophicae. 
            Dolichocephaly. Enophthalmos. Retrognathia"""
dft.at['D46', 'Other'] = 'Pectus excavatum'
dft

Feature/Finding,AgeAtLastExamination(years),Inheritance,HeightOver98P,PectusCarinatum,ReducedUSLSOrIncreasedSpanheightRatio,WristAndThumbSign,Scoliosis,HyperextensibleJoints,Other,EctopiaLentis,DilatationOfTheAscendingAorta,MitralValveProlapse,UnusualFeatures,patient_id
B13,13,fam,−,+,+,+/−,−,+,-,+,+,+,−,B13
B32,15,spor,,+,+,+,−,−,Inguinal hernia,−,+,−,−,B32
B55,40,,−,−,−,+/−,−,−,Inguinal hernia. Descending aortic dissection. High palate. Striae atrophicae. \n Dolichocephaly. Enophthalmos. Retrognathia,+,desc,−,+,B55
B73,21,spor,+,−,−,−,+,−,-,−,−,−,+,B73
D22,10,fam,+,−,,+,+,+,-,−,+,−,−,D22
D46,24,spor,+,−,,+,+,+,Pectus excavatum,+,−,+,+,D46


In [6]:
column_mapper_list = list()
items = {
      'HeightOver98P': ['Tall stature', 'HP:0000098'],
      'PectusCarinatum': ["Pectus carinatum","HP:0000768"],
      'ReducedUSLSOrIncreasedSpanheightRatio': ['Disproportionate tall stature', 'HP:0001519'],
      'WristAndThumbSign': ['Arachnodactyly', 'HP:0001166'],
      'Scoliosis': ['Scoliosis', 'HP:0002650'],
      'HyperextensibleJoints': ['Joint hypermobility','HP:0001382'],
      'EctopiaLentis': ["Ectopia lentis", "HP:0001083"],
      'DilatationOfTheAscendingAorta': ["Aortic root aneurysm", "HP:0002616"],
      'MitralValveProlapse': ['Mitral valve prolapse', 'HP:0001634'],
}

item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, observed='+',
  excluded='-')

# Transfer to column_mapper_d

for k, v in item_column_mapper_d.items():
    column_mapper_list.append(v)

In [7]:
otherMapper = OptionColumnMapper(column_name='Other',concept_recognizer=hpo_cr, option_d={})
column_mapper_list.append(otherMapper)
otherMapper.preview_column(dft)

Unnamed: 0,mapping,count
0,Inguinal hernia (HP:0000023) (observed),2
1,Retrognathia (HP:0000278) (observed),1
2,Dolichocephaly (HP:0000268) (observed),1
3,Deeply set eye (HP:0000490) (observed),1
4,Striae distensae (HP:0001065) (observed),1
5,High palate (HP:0000218) (observed),1
6,Descending aortic dissection (HP:0012499) (observed),1
7,Pectus excavatum (HP:0000767) (observed),1


<p>The variants in the original publication were given in non-HGVS syntax. Here we add the variants using correct syntax to the dataframe.</p>

In [8]:
# B13 G7565C C2522S  -- c.7565G>C
# B55  -- c.7331-2A>G
# D22 C7605A C2535X  -- c.7605C>A
# B73  A7871G N2624S -- c.7871A>G
# D46  C8038T R2680C -- c.8038C>T
# B32  c.8525_8529del

fbn1_transcript='NM_000138.5'
variants_map ={
    "B13": "c.7565G>C",
    "B55": "c.7331-2A>G",
    "D22": "c.7605C>A",
    "B73": "c.7871A>G",
    "D46": "c.8038C>T",
    "B32": "c.8525_8529del"
}

dft["Variants"] = variants_map
vvalidator = VariantValidator(genome_build="hg38", transcript=fbn1_transcript)
variant_d = {}
for v in variants_map.values():
    var = vvalidator.encode_hgvs(v)
    variant_d[v] = var
varMapper = VariantColumnMapper(variant_d=variant_d,
                                variant_column_name='Variants', 
                                default_genotype='heterozygous')

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7565G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7331-2A>G/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7605C>A/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.7871A>G/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.8038C>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.8525_8529del/NM_000138.5?content-type=application%2Fjson


In [9]:
sex_d = {"B13": "M", 
        "B32": "M",
        "B55": "M",
        "B73": "F",
        "D22": "M",
        "D46": "M"}
dft["Sex"] = sex_d
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", column_name="Sex")

In [10]:
pmid = "PMID:10756346"
ageMapper = AgeColumnMapper.by_year("AgeAtLastExamination(years)")
encoder = CohortEncoder(df=dft, 
                        hpo_cr=hpo_cr, 
                        column_mapper_list=column_mapper_list, 
                        individual_column_name="patient_id", 
                        age_of_onset_mapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata)
omim_id = "OMIM:154700"
omim_label = "Marfan syndrome"
mfs = Disease(disease_id=omim_id, disease_label=omim_label)
encoder.set_disease(disease=mfs)

In [11]:
individuals = encoder.get_individuals()

In [12]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

Level,Error category,Count
INFORMATION,NOT_MEASURED,30


In [13]:
individuals = cvalidator.get_error_free_individual_list()
phenopackets = [i.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh()) for i in individuals]
table = PhenopacketTable(phenopacket_list=phenopackets)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
B13 (MALE; P13Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.7565G>C (heterozygous),Pectus carinatum (HP:0000768); Disproportionate tall stature (HP:0001519); Joint hypermobility (HP:0001382); Ectopia lentis (HP:0001083); Aortic root aneurysm (HP:0002616); Mitral valve prolapse (HP:0001634)
B32 (MALE; P15Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.8525_8529del (heterozygous),Pectus carinatum (HP:0000768); Disproportionate tall stature (HP:0001519); Arachnodactyly (HP:0001166); Aortic root aneurysm (HP:0002616); Inguinal hernia (HP:0000023)
B55 (MALE; P40Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.7331-2A>G (heterozygous),Ectopia lentis (HP:0001083); High palate (HP:0000218); Retrognathia (HP:0000278); Descending aortic dissection (HP:0012499); Deeply set eye (HP:0000490); Striae distensae (HP:0001065); Dolichocephaly (HP:0000268); Inguinal hernia (HP:0000023)
B73 (FEMALE; P21Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.7871A>G (heterozygous),Tall stature (HP:0000098); Scoliosis (HP:0002650)
D22 (MALE; P10Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.7605C>A (heterozygous),Tall stature (HP:0000098); Arachnodactyly (HP:0001166); Scoliosis (HP:0002650); Joint hypermobility (HP:0001382); Aortic root aneurysm (HP:0002616)
D46 (MALE; P24Y),Marfan syndrome (OMIM:154700),NM_000138.5:c.8038C>T (heterozygous),Tall stature (HP:0000098); Arachnodactyly (HP:0001166); Scoliosis (HP:0002650); Joint hypermobility (HP:0001382); Ectopia lentis (HP:0001083); Mitral valve prolapse (HP:0001634); Pectus excavatum (HP:0000767)


In [14]:
output_directory = "phenopackets"
Individual.output_individuals_as_phenopackets(individual_list=individuals,
                                              metadata=metadata,
                                              outdir=output_directory)

We output 6 GA4GH phenopackets to the directory phenopackets
