<H1>Stiff Skin Syndrome, Loeys et al. (2010)</H1>
<p>This notebook creates GA4GH Phenopackets from the clinical data in <a href="https://pubmed.ncbi.nlm.nih.gov/20375004/" target="__blank">Loeys BL, et al., Mutations in fibrillin-1 cause congenital scleroderma: stiff skin syndrome. Sci Transl Med. 2010 Mar 17;2(23):23ra20. PMID:20375004</a>.</p>

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import display, HTML
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.3


In [2]:
PMID = "PMID:20375004"
title = "Mutations in fibrillin-1 cause congenital scleroderma: stiff skin syndrome"
cite = Citation(pmid=PMID, title=title)
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", citation=cite)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2023-10-09


In [3]:
df = pd.read_table('input/stiffskin_loeys2010.tsv')

In [4]:
df

Unnamed: 0,Identifier,1-II:1,1-III:2,2-III:1,2-IV:2,3-I:1,3-II:2,3-II:3,4-II:1
0,Age(yrs),51,25,70,37,54,22,21,54
1,Sex,M,M,M,F,F,F,F,M
2,Height(cm),173,165,163,155,145,160,159,169
3,Stiff skin,++,+++,+++,+++,+++,+++,+++,+
4,Surgery,"Appendectomy, Herniorraphy, Coronaryarterybypass",Cesarean section,Gastric ulcer surgery,Cesarean Sections,Hysterectomy,Achilles tendon lengthening,Achillestendonlengthening,.
5,Ectopia Lentis,─,─,─*,─,─,─,─,─
6,Cataract,─,─,+,─,+,─,─,─
7,Myopia,+,+,─,─,+,+,+,+
8,Arachnodactyly,─,─,─,─,─,─,─,─
9,Camptodactyly,++,++,++,++,++,++,++,++


In [5]:
# convert to row-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft['patient_id'] = dft.index
dft.head()

Identifier,Age(yrs),Sex,Height(cm),Stiff skin,Surgery,Ectopia Lentis,Cataract,Myopia,Arachnodactyly,Camptodactyly,Elbowcontractures,Limitedshoulderelevation,Kneecontractures,MCP/IPnodules,Aorta,Neuropathy,GER,Variant,patient_id
1-II:1,51,M,173,++,"Appendectomy, Herniorraphy, Coronaryarterybypass",─,─,+,─,++,++,─,++,++,nl,─,+,c.4710G>C,1-II:1
1-III:2,25,M,165,+++,Cesarean section,─,─,+,─,++,++,++,++,++,nl,+,+,c.4710G>C,1-III:2
2-III:1,70,M,163,+++,Gastric ulcer surgery,─*,+,─,─,++,++,++,++,++,nl,─,+,c.4710G>T,2-III:1
2-IV:2,37,F,155,+++,Cesarean Sections,─,─,─,─,++,++,++,++,++,nl,─,+,c.4710G>T,2-IV:2
3-I:1,54,F,145,+++,Hysterectomy,─,+,+,─,++,++,++,++,+,nl,─,─,c.4691G>C,3-I:1


<h2>Create mappers to convert data</h2>

In [6]:
column_mapper_d = {}

In [7]:
symbol_d = {'+': 'Stiff skin', '++': 'Stiff skin', '+++': 'Stiff skin'}
stiffSkinMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=symbol_d)
stiffSkinMapper.preview_column(dft['Stiff skin'])
column_mapper_d['Stiff skin'] = stiffSkinMapper

In [8]:
items = {
  'Ectopia Lentis': ["Ectopia lentis","HP:0001083"],
  'Cataract': ['Cataract', 'HP:0000518'],
  'Myopia': ["Myopia", "HP:0000545"],
  'Arachnodactyly': ['Arachnodactyly', 'HP:0001166'],
  'Camptodactyly': ['Camptodactyly', 'HP:0012385'],
  'Elbowcontractures': ['Elbow contracture', 'HP:0034391'],
  'Limitedshoulderelevation': ['Limited shoulder flexion','HP:0033482'],
  'Neuropathy': ["Entrapment neuropathy",'HP:0012181'],
  'GER': ['Gastroesophageal reflux', 'HP:0002020']
}

item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, 
                                                            observed={'+', '++'},
                                                              excluded='-')

# Transfer to column_mapper_d
for k, v in item_column_mapper_d.items():
    column_mapper_d[k] = v

In [9]:
# Aorta
# Bicuspid aortic valve HP:0001647
mapper = SimpleColumnMapper(hpo_id='HP:0001647',
    hpo_label='Bicuspid aortic valve',
    observed='BAV',
    excluded='─')
mapper.preview_column(dft['Aorta'])
column_mapper_d['Aorta'] = mapper

<h2>Demographic columns</h2>

In [10]:
ageMapper = AgeColumnMapper.by_year('Age(yrs)')
#ageMapper.preview_column(dft['Age(yrs)'])

In [11]:
sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Sex')
#sexMapper.preview_column(dft['Sex'])

<h2>Variant data</H2>

In [12]:
fbn1_transcript='NM_000138.5' 
vvalidator = VariantValidator(genome_build="hg38", transcript=fbn1_transcript)
variant_d = {}
for v in dft['Variant'].unique():
    var = vvalidator.encode_hgvs(v)
    variant_d[v] = var
print(f"Extracted {len(variant_d)} distinct variants")


varMapper = VariantColumnMapper(variant_d=variant_d,
                                variant_column_name='Variant', 
                                default_genotype='heterozygous')

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4729T>G/NM_000138.5?content-type=application%2Fjson
Extracted 4 distinct variants


In [13]:
pmid = "PMID:20375004"
encoder = CohortEncoder(df=dft, 
                        hpo_cr=hpo_cr, 
                        column_mapper_d=column_mapper_d, 
                        individual_column_name="patient_id", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        metadata=metadata,
                        variant_mapper=varMapper)
sss = Disease(disease_id='OMIM:184900', disease_label='Stiff skin syndrome')
encoder.set_disease(sss)
individuals = encoder.get_individuals()

In [14]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_html()))

ID,Level,Category,Message,HPO Term
PMID_20375004_1-II:1,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_1-III:2,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_2-III:1,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_2-IV:2,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_3-I:1,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_3-II:2,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_3-II:3,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_4-II:1,INFORMATION,NOT_MEASURED,Arachnodactyly (HP:0001166) was listed as not measured and will be omitted,not measured: Arachnodactyly (HP:0001166)
PMID_20375004_1-II:1,INFORMATION,NOT_MEASURED,Bicuspid aortic valve (HP:0001647) was listed as not measured and will be omitted,not measured: Bicuspid aortic valve (HP:0001647)
PMID_20375004_1-III:2,INFORMATION,NOT_MEASURED,Bicuspid aortic valve (HP:0001647) was listed as not measured and will be omitted,not measured: Bicuspid aortic valve (HP:0001647)


In [15]:
individuals = cvalidator.get_error_free_individual_list()
phenopackets = [i.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh()) for i in individuals]
table = PhenopacketTable(phenopacket_list=phenopackets)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
1-II:1 (MALE; P51Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4710G>C (heterozygous),Stiff skin (HP:0030053); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Gastroesophageal reflux (HP:0002020)
1-III:2 (MALE; P25Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4710G>C (heterozygous),Stiff skin (HP:0030053); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482); Entrapment neuropathy (HP:0012181); Gastroesophageal reflux (HP:0002020)
2-III:1 (MALE; P70Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4710G>T (heterozygous),Stiff skin (HP:0030053); Cataract (HP:0000518); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482); Gastroesophageal reflux (HP:0002020)
2-IV:2 (FEMALE; P37Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4710G>T (heterozygous),Stiff skin (HP:0030053); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482); Gastroesophageal reflux (HP:0002020)
3-I:1 (FEMALE; P54Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4691G>C (heterozygous),Stiff skin (HP:0030053); Cataract (HP:0000518); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482)
3-II:2 (FEMALE; P22Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4691G>C (heterozygous),Stiff skin (HP:0030053); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482); Bicuspid aortic valve (HP:0001647)
3-II:3 (FEMALE; P21Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4691G>C (heterozygous),Stiff skin (HP:0030053); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482)
4-II:1 (MALE; P54Y),Stiff skin syndrome (OMIM:184900),NM_000138.5:c.4729T>G (heterozygous),Stiff skin (HP:0030053); Myopia (HP:0000545); Camptodactyly (HP:0012385); Elbow contracture (HP:0034391); Limited shoulder flexion (HP:0033482); Entrapment neuropathy (HP:0012181)


In [16]:
output_directory = "phenopackets"
Individual.output_individuals_as_phenopackets(individual_list=individuals,
                                              metadata=metadata,
                                              outdir=output_directory)

We output 8 GA4GH phenopackets to the directory phenopackets
