<h1>Noon syndrome 1 phenopackets</h1>
<p>Lee ST, Ki CS, Lee HJ. Mutation analysis of the genes involved in the Ras-mitogen-activated protein kinase (MAPK) pathway in Korean patients with Noonan syndrome. Clin Genet. 2007 Aug;72(2):150-5.PMID: 17661820.</p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import os
import sys
from pyphetools.creation import *

In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
pmid = "PMID:17661820"
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199")
metadata.default_versions_with_hpo(version=hpo_version)

In [3]:
df = pd.read_excel('Lee2007Noonan1.xlsx')

In [4]:
df

Unnamed: 0,Patient,1,2,3,4,5,6,7
0,Sex,M,F,M,F,F,M,M
1,Age,5,29,4,2,30,6,3
2,PTPN11 mutation,T42A,N308D,N308D,N308D,N308D,N308D,M504V
3,transcript.hgvs,c.124A>G,c.922A>G,c.922A>G,c.922A>G,c.922A>G,c.922A>G,c.1510A>G
4,CHD,"ASD, SVC and IVC anomaly",PS,"ASD, PS, hypoplastic MPA","VSD, PS",PS,"ASD, PS","ASD, mild PS"
5,Facial.features,Yes,Equivocal,Yes,Yes,Yes,Yes,Yes
6,Webbed neck,Yes,Yes,-,-,Yes,Yes,Yes
7,Short stature,Yes,Yes,Yes,Yes,Yes,No,Yes
8,Chest deformity,Yes,Yes,Yes,-,-,Yes,Yes
9,Feeding problems,-,-,-,Yes,-,-,-


In [5]:
# need to convert to column-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft.head()
dft['patient_id'] = dft.index

In [6]:
column_mapper_d = defaultdict(ColumnMapper)

In [7]:
chd_d = {'ASD': 'Atrial septum defect',
         'SVC': 'Bilateral superior vena cava', # from paper!
             'PS': 'Pulmonic stenosis',
        'hypoplastic MPA': 'Pulmonary artery hypoplasia',
        'VSD': 'Ventricular septal defect'}
chdMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=chd_d)
chdMapper.preview_column(dft['CHD'])
#column_mapper_d['CHD'] = chdMapper

Unnamed: 0,terms
0,HP:0001631 (Atrial septum defect/observed); HP:0033379 (Bilateral superior vena cava/observed)
1,HP:0001642 (Pulmonic stenosis/observed)
2,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed); HP:0004971 (Pulmonary artery hypoplasia/observed)
3,HP:0001642 (Pulmonic stenosis/observed); HP:0001629 (Ventricular septal defect/observed)
4,HP:0001642 (Pulmonic stenosis/observed)
5,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed)
6,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed)


In [8]:
#Webbed neck HP:0000465
webbedNeckMapper = SimpleColumnMapper(hpo_id='HP:0000465',
    hpo_label='Webbed neck',
    observed='Yes',
    excluded='−')
#webbedNeckMapper.preview_column(dft['Webbed neck'])
column_mapper_d['Webbed neck'] = webbedNeckMapper

In [9]:
# Short stature HP:0004322
shortStatureMapper = SimpleColumnMapper(hpo_id='HP:0004322',
    hpo_label='Short stature',
    observed='Yes',
    excluded='−')
#shortStatureMapper.preview_column(dft['Short stature'])
column_mapper_d['Short stature'] = shortStatureMapper

In [10]:
# Chest deformity -- assume pectus excavatum, reported for one patient only in detail
# Pectus excavatum HP:0000767
pectusMapper = SimpleColumnMapper(hpo_id='HP:0000767',
    hpo_label='Pectus excavatum',
    observed='Yes',
    excluded='−')
#pectusMapper.preview_column(dft['Chest deformity'])
column_mapper_d['Chest deformity'] = pectusMapper

In [11]:
# Feeding difficulties HP:0011968
feedingMapper = SimpleColumnMapper(hpo_id='HP:0011968',
    hpo_label='Feeding difficulties',
    observed='Yes',
    excluded='−')
#feedingMapper.preview_column(dft['Feeding problems'])
column_mapper_d['Feeding problems'] = feedingMapper

In [12]:
# Hearing problem
# Hearing impairment HP:0000365
hearingMapper = SimpleColumnMapper(hpo_id='HP:0000365',
    hpo_label='Hearing impairment',
    observed='Yes',
    excluded='−')
#hearingMapper.preview_column(dft['Hearing problem'])
column_mapper_d['Hearing problem'] = hearingMapper

In [13]:
# Delayed development
# Global developmental delay HP:0001263
devMapper = SimpleColumnMapper(hpo_id='HP:0001263',
    hpo_label='Global developmental delay',
    observed='Yes',
    excluded='−')
#devMapper.preview_column(dft['Delayed development'])
column_mapper_d['Delayed development'] = devMapper

In [14]:
# Mental retardation
# Intellectual disability, mild HP:0001256
idMapper =  SimpleColumnMapper(hpo_id='HP:0001256',
    hpo_label='Intellectual disability, mild',
    observed='Mild',
    excluded='−')
#idMapper.preview_column(dft['Mental retardation'])
column_mapper_d['Mental retardation'] = idMapper

In [15]:
# Cryptorchidism HP:0000028
cryptorchidismMapper =  SimpleColumnMapper(hpo_id='HP:0000028',
    hpo_label='Cryptorchidism',
    observed='Yes',
    excluded='−')
#cryptorchidismMapper.preview_column(dft['Cryptorchidism'])
column_mapper_d['Cryptorchidism'] = cryptorchidismMapper

In [16]:
# Cubitus valgus HP:0002967
cvalMapper =  SimpleColumnMapper(hpo_id='HP:0002967',
    hpo_label='Cubitus valgus',
    observed='Yes',
    excluded='−')
#cvalMapper.preview_column(dft['Cubitus valgus'])
column_mapper_d['Cubitus valgus'] = cvalMapper

In [1]:
# Patient 1 had a small ectopic kidney
other_d = {'Splenomegaly': 'Splenomegaly',
         'Renal': 'Ectopic kidney ', # from paper!
             }
otherMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_d)
#otherMapper.preview_column(dft['Others'])
column_mapper_d['Others'] = otherMapper


NameError: name 'OptionColumnMapper' is not defined

<h3>Variants</h3>
<p>By inspection in ClinVar, the three variants are: NM_002834.5(PTPN11):c.124A>G (p.Thr42Ala), NM_002834.5(PTPN11):c.922A>G (p.Asn308Asp), and  NM_002834.5(PTPN11):c.1510A>G (p.Met504Val) </p>

In [18]:
genome = 'hg38'
default_genotype = 'heterozygous'
transcript='NM_002834.5'
varMapper = VariantColumnMapper(assembly=genome,column_name='transcript.hgvs', 
                                transcript=transcript, genotype=default_genotype)

In [19]:
ageMapper = AgeColumnMapper.by_year('Age')
ageMapper.preview_column(dft['Age'])

Unnamed: 0,original column contents,age
0,5,P5Y
1,29,P29Y
2,4,P4Y
3,2,P2Y
4,30,P30Y
5,6,P6Y
6,3,P3Y


In [20]:
sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Sex')
sexMapper.preview_column(dft['Sex'])

Unnamed: 0,original column contents,sex
0,M,MALE
1,F,FEMALE
2,M,MALE
3,F,FEMALE
4,F,FEMALE
5,M,MALE
6,M,MALE


In [21]:
encoder = CohortEncoder(df=dft, hpo_cr=hpo_cr, column_mapper_d=column_mapper_d, 
                        individual_column_name="patient_id", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        metadata=metadata,
                        variant_mapper=varMapper,
                        pmid=pmid)

In [22]:
dft.columns

Index(['Sex', 'Age', 'PTPN11 mutation', 'transcript.hgvs', 'CHD',
       'Facial.features', 'Webbed neck', 'Short stature', 'Chest deformity',
       'Feeding problems', 'Easy bruising', 'Hearing problem',
       'Delayed development', 'Mental retardation', 'Cryptorchidism',
       'Cubitus valgus', 'Others', 'patient_id'],
      dtype='object', name='Patient')

In [23]:
individuals = encoder.get_individuals()

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.124A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.1510A>G/NM_002834.5?content-type=application%2Fjson


In [24]:
i1 = individuals[0]
phenopacket1 = i1.to_ga4gh_phenopacket(metadata=metadata.to_ga4gh())
json_string = MessageToJson(phenopacket1)
print(json_string)

{
  "id": "1",
  "subject": {
    "id": "1",
    "timeAtLastEncounter": {
      "age": {
        "iso8601duration": "P5Y"
      }
    },
    "sex": "MALE"
  },
  "phenotypicFeatures": [
    {
      "type": {
        "id": "HP:0000465",
        "label": "Webbed neck"
      },
      "onset": {
        "age": {
          "iso8601duration": "P5Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0004322",
        "label": "Short stature"
      },
      "onset": {
        "age": {
          "iso8601duration": "P5Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0000767",
        "label": "Pectus excavatum"
      },
      "onset": {
        "age": {
          "iso8601duration": "P5Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001263",
        "label": "Global developmental delay"
      },
      "onset": {
        "age": {
          "iso8601duration": "P5Y"
        }
      }
    },
    {
      "type": {
        "id": "HP:0001256",
      

In [25]:
output_directory = "phenopackets_lee_2007"
encoder.output_phenopackets(outdir=output_directory)

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.124A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.1510A>G/NM_002834.5?content-type=application%2Fjson
Wrote 7 phenopacket