<H1>Stiff Skin Syndrome, Loeys et al. (2010)</H1>
<p>This notebook creates GA4GH Phenopackets from the clinical data in <a href="https://pubmed.ncbi.nlm.nih.gov/20375004/" target="__blank">Loeys BL, et al., Mutations in fibrillin-1 cause congenital scleroderma: stiff skin syndrome. Sci Transl Med. 2010 Mar 17;2(23):23ra20. PMID:20375004</a>.</p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import os
import sys

from pyphetools.creation import *

In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)

In [5]:
df = pd.read_table('input/stiffskin_loeys2010.tsv')

In [6]:
df


Unnamed: 0,Identifier,1-II:1,1-III:2,2-III:1,2-IV:2,3-I:1,3-II:2,3-II:3,4-II:1
0,Age(yrs),51,25,70,37,54,22,21,54
1,Sex,M,M,M,F,F,F,F,M
2,Height(cm),173,165,163,155,145,160,159,169
3,Stiff skin,++,+++,+++,+++,+++,+++,+++,+
4,Surgery,"Appendectomy, Herniorraphy, Coronaryarterybypass",Cesarean section,Gastric ulcer surgery,Cesarean Sections,Hysterectomy,Achilles tendon lengthening,Achillestendonlengthening,.
5,,,,,,,,,
6,Ectopia Lentis,─,─,─*,─,─,─,─,─
7,Cataract,─,─,+,─,+,─,─,─
8,Myopia,+,+,─,─,+,+,+,+
9,Arachnodactyly,─,─,─,─,─,─,─,─


In [9]:
# convert to row-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft['patient_id'] = dft.index
dft.head()

Identifier,Age(yrs),Sex,Height(cm),Stiff skin,Surgery,NaN,Ectopia Lentis,Cataract,Myopia,Arachnodactyly,Camptodactyly,Elbowcontractures,Limitedshoulderelevation,Kneecontractures,MCP/IPnodules,Aorta,Neuropathy,GER,Variant,patient_id
1-II:1,51,M,173,++,"Appendectomy, Herniorraphy, Coronaryarterybypass",,─,─,+,─,++,++,─,++,++,nl,─,+,c.4710G>C,1-II:1
1-III:2,25,M,165,+++,Cesarean section,,─,─,+,─,++,++,++,++,++,nl,+,+,c.4710G>C,1-III:2
2-III:1,70,M,163,+++,Gastric ulcer surgery,,─*,+,─,─,++,++,++,++,++,nl,─,+,c.4710G>T,2-III:1
2-IV:2,37,F,155,+++,Cesarean Sections,,─,─,─,─,++,++,++,++,++,nl,─,+,c.4710G>T,2-IV:2
3-I:1,54,F,145,+++,Hysterectomy,,─,+,+,─,++,++,++,++,+,nl,─,─,c.4691G>C,3-I:1


<h2>Create mappers to convert data</h2>

In [11]:
column_mapper_d = defaultdict(ColumnMapper)

In [14]:
symbol_d = {'+': 'Stiff skin', '++': 'Stiff skin', '+++': 'Stiff skin'}
stiffSkinMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=symbol_d)
stiffSkinMapper.preview_column(dft['Stiff skin'])
['Stiff skin'] = stiffSkinMapper

In [17]:
elMapper = SimpleColumnMapper(hpo_id='HP:0001083',
    hpo_label='Ectopia Lentis',
    observed='+',
    excluded='─')
elMapper.preview_column(dft['Ectopia Lentis'])
column_mapper_d['Ectopia Lentis'] = elMapper

In [19]:
# Cataract HP:0000518
mapper = SimpleColumnMapper(hpo_id='HP:0000518',
    hpo_label='Cataract',
    observed='+',
    excluded='─')
mapper.preview_column(dft['Cataract'])
column_mapper_d['Cataract'] = mapper

In [20]:
# Myopia HP:0000545
mapper = SimpleColumnMapper(hpo_id='HP:0000545',
    hpo_label='Myopia',
    observed='+',
    excluded='─')
mapper.preview_column(dft['Myopia'])
column_mapper_d['Myopia'] = mapper

Unnamed: 0,term,status
0,Myopia (HP:0000545),observed
1,Myopia (HP:0000545),observed
2,Myopia (HP:0000545),excluded
3,Myopia (HP:0000545),excluded
4,Myopia (HP:0000545),observed
5,Myopia (HP:0000545),observed
6,Myopia (HP:0000545),observed
7,Myopia (HP:0000545),observed


In [21]:
# Arachnodactyly HP:0001166
mapper = SimpleColumnMapper(hpo_id='HP:0001166',
    hpo_label='Arachnodactyly',
    observed='+',
    excluded='─')
mapper.preview_column(dft['Arachnodactyly'])
column_mapper_d['Arachnodactyly'] = mapper

Unnamed: 0,term,status
0,Arachnodactyly (HP:0001166),excluded
1,Arachnodactyly (HP:0001166),excluded
2,Arachnodactyly (HP:0001166),excluded
3,Arachnodactyly (HP:0001166),excluded
4,Arachnodactyly (HP:0001166),excluded
5,Arachnodactyly (HP:0001166),excluded
6,Arachnodactyly (HP:0001166),excluded
7,Arachnodactyly (HP:0001166),excluded


In [24]:
# Camptodactyly HP:0012385
mapper = SimpleColumnMapper(hpo_id='HP:0012385',
    hpo_label='Camptodactyly',
    observed='++',
    excluded='─')
mapper.preview_column(dft['Camptodactyly'])
column_mapper_d['Camptodactyly'] = mapper

In [27]:
# Elbow contracture HP:0034391
mapper = SimpleColumnMapper(hpo_id='HP:0034391',
    hpo_label='Elbow contracture',
    observed='++',
    excluded='─')
mapper.preview_column(dft['Elbowcontractures'])
column_mapper_d['Elbowcontractures'] = mapper

In [28]:
# Limitedshoulderelevation
# Limited shoulder flexion HP:0033482 
# Shoulder flexion is the motion that moves the arms from a 
#resting position by the side of the body to a position above the head.
mapper = SimpleColumnMapper(hpo_id='HP:0033482',
    hpo_label='Limited shoulder flexion',
    observed='++',
    excluded='─')
mapper.preview_column(dft['Limitedshoulderelevation'])
column_mapper_d['Limitedshoulderelevation'] = mapper

Unnamed: 0,term,status
0,Limited shoulder flexion (HP:0033482),excluded
1,Limited shoulder flexion (HP:0033482),observed
2,Limited shoulder flexion (HP:0033482),observed
3,Limited shoulder flexion (HP:0033482),observed
4,Limited shoulder flexion (HP:0033482),observed
5,Limited shoulder flexion (HP:0033482),observed
6,Limited shoulder flexion (HP:0033482),observed
7,Limited shoulder flexion (HP:0033482),observed


In [None]:
# Kneecontractures
# TODO ADD ME

In [29]:
#MCP/IPnodules
# TODO ADD ME

In [32]:
# Aorta
# Bicuspid aortic valve HP:0001647
mapper = SimpleColumnMapper(hpo_id='HP:0001647',
    hpo_label='Bicuspid aortic valve',
    observed='BAV',
    excluded='─')
mapper.preview_column(dft['Aorta'])
column_mapper_d['Aorta'] = mapper

In [36]:
# Neuropathy (diffuse entrapment neuropathy (nerve injury and dysfunction because of local compression)
# Entrapment neuropathy HP:0012181
mapper = SimpleColumnMapper(hpo_id='HP:0012181',
    hpo_label='Entrapment neuropathy',
    observed='+',
    excluded='─')
mapper.preview_column(dft['Neuropathy'])
column_mapper_d['Neuropathy'] = mapper

In [37]:
# GER
# Gastroesophageal reflux HP:0002020
mapper = SimpleColumnMapper(hpo_id='HP:0002020',
    hpo_label='Gastroesophageal reflux',
    observed='+',
    excluded='─')
mapper.preview_column(dft['GER'])
column_mapper_d['GER'] = mapper

<h2>Demographic columns</h2>

In [40]:
ageMapper = AgeColumnMapper.by_year('Age(yrs)')
#ageMapper.preview_column(dft['Age(yrs)'])

In [41]:
sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Sex')
#sexMapper.preview_column(dft['Sex'])

<h2>Variant data</H2>

In [43]:
genome = 'hg38'
default_genotype = 'heterozygous'
transcript='NM_000138.5' # FBN1
varMapper = VariantColumnMapper(assembly=genome,column_name='Variant', 
                                transcript=transcript, genotype=default_genotype)
#varMapper.preview_column(dft['Variant'])

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.

Unnamed: 0,variant
0,chr15:48467975C>G
1,chr15:48467975C>G
2,chr15:48467975C>A
3,chr15:48467975C>A
4,chr15:48467994C>G
5,chr15:48467994C>G
6,chr15:48467994C>G
7,chr15:48467956A>C


In [48]:
pmid = "PMID:20375004"
encoder = CohortEncoder(df=dft, hpo_cr=hpo_cr, column_mapper_d=column_mapper_d, 
                        individual_column_name="patient_id", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        metadata=metadata,
                        variant_mapper=varMapper,
                        pmid=pmid)
encoder.set_disease(disease_id='OMIM:184900', label='Stiff skin syndrome')

In [49]:
individuals = encoder.get_individuals()

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.

In [50]:
i1 = individuals[0]
phenopacket1 = i1.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh())
json_string = MessageToJson(phenopacket1)
print(json_string)

{
  "id": "1-II:1",
  "subject": {
    "id": "1-II:1",
    "timeAtLastEncounter": {
      "age": {
        "iso8601duration": "P51Y"
      }
    },
    "sex": "MALE"
  },
  "phenotypicFeatures": [
    {
      "type": {
        "id": "HP:0030053",
        "label": "Stiff skin"
      },
      "onset": {
        "age": {
          "iso8601duration": "P51Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001083",
        "label": "Ectopia Lentis"
      },
      "excluded": true,
      "onset": {
        "age": {
          "iso8601duration": "P51Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0000518",
        "label": "Cataract"
      },
      "excluded": true,
      "onset": {
        "age": {
          "iso8601duration": "P51Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0012385",
        "label": "Camptodactyly"
      },
      "onset": {
        "age": {
          "iso8601duration": "P51Y"
        }
      }
    },
    {
      "t

In [51]:
output_directory = "phenopackets"
encoder.output_phenopackets(outdir=output_directory)

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4710G>T/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_000138.5%3Ac.4691G>C/NM_000138.5?content-type=application%2Fjson
https://rest.