<h1>Noon syndrome 1: Lee at al (2007): phenopackets</h1>
<p>Data imported from <a href="https://pubmed.ncbi.nlm.nih.gov/17661820/">Lee ST, Ki CS, Lee HJ. Mutation analysis of the genes involved in the Ras-mitogen-activated protein kinase (MAPK) pathway in Korean patients with Noonan syndrome. Clin Genet. 2007 Aug;72(2):150-5.PMID: 17661820</a>.</p>

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import display, HTML
import pyphetools
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import CohortValidator
print(f"pyphetools version {pyphetools.__version__}")

pyphetools version 0.8.31


In [2]:
PMID = "PMID:17661820"
title = "Mutation analysis of the genes involved in the Ras-mitogen-activated protein kinase (MAPK) pathway in Korean patients with Noonan syndrome"
citation = Citation(pmid=PMID, title=title)
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", citation=citation)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2023-10-09


In [3]:
df = pd.read_excel('input/Lee2007Noonan1.xlsx')

In [4]:
df.head()

Unnamed: 0,Patient,1,2,3,4,5,6,7
0,Sex,M,F,M,F,F,M,M
1,Age,5,29,4,2,30,6,3
2,PTPN11 mutation,T42A,N308D,N308D,N308D,N308D,N308D,M504V
3,transcript.hgvs,c.124A>G,c.922A>G,c.922A>G,c.922A>G,c.922A>G,c.922A>G,c.1510A>G
4,CHD,"ASD, SVC and IVC anomaly",PS,"ASD, PS, hypoplastic MPA","VSD, PS",PS,"ASD, PS","ASD, mild PS"


In [5]:
# need to convert to column-based format
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft.head()
dft['patient_id'] = dft.index

In [6]:
column_mapper_d = {}

In [7]:
chd_d = {'ASD': 'Atrial septum defect',
         'SVC': 'Bilateral superior vena cava', # from paper!
             'PS': 'Pulmonic stenosis',
        'hypoplastic MPA': 'Pulmonary artery hypoplasia',
        'VSD': 'Ventricular septal defect'}
chdMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=chd_d)
chdMapper.preview_column(dft['CHD'])
#column_mapper_d['CHD'] = chdMapper

Unnamed: 0,terms
0,HP:0001631 (Atrial septum defect/observed); HP:0033379 (Bilateral superior vena cava/observed)
1,HP:0001642 (Pulmonic stenosis/observed)
2,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed); HP:0004971 (Pulmonary artery hypoplasia/observed)
3,HP:0001629 (Ventricular septal defect/observed); HP:0001642 (Pulmonic stenosis/observed)
4,HP:0001642 (Pulmonic stenosis/observed)
5,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed)
6,HP:0001631 (Atrial septum defect/observed); HP:0001642 (Pulmonic stenosis/observed)


In [8]:
#Webbed neck HP:0000465
webbedNeckMapper = SimpleColumnMapper(hpo_id='HP:0000465',
    hpo_label='Webbed neck',
    observed='Yes',
    excluded='−')
#webbedNeckMapper.preview_column(dft['Webbed neck'])
column_mapper_d['Webbed neck'] = webbedNeckMapper

In [9]:
# Short stature HP:0004322
shortStatureMapper = SimpleColumnMapper(hpo_id='HP:0004322',
    hpo_label='Short stature',
    observed='Yes',
    excluded='−')
#shortStatureMapper.preview_column(dft['Short stature'])
column_mapper_d['Short stature'] = shortStatureMapper

In [10]:
# Chest deformity -- assume pectus excavatum, reported for one patient only in detail
# Pectus excavatum HP:0000767
pectusMapper = SimpleColumnMapper(hpo_id='HP:0000767',
    hpo_label='Pectus excavatum',
    observed='Yes',
    excluded='−')
#pectusMapper.preview_column(dft['Chest deformity'])
column_mapper_d['Chest deformity'] = pectusMapper

In [11]:
# Feeding difficulties HP:0011968
feedingMapper = SimpleColumnMapper(hpo_id='HP:0011968',
    hpo_label='Feeding difficulties',
    observed='Yes',
    excluded='−')
#feedingMapper.preview_column(dft['Feeding problems'])
column_mapper_d['Feeding problems'] = feedingMapper

In [12]:
# Hearing problem
# Hearing impairment HP:0000365
hearingMapper = SimpleColumnMapper(hpo_id='HP:0000365',
    hpo_label='Hearing impairment',
    observed='Yes',
    excluded='−')
#hearingMapper.preview_column(dft['Hearing problem'])
column_mapper_d['Hearing problem'] = hearingMapper

In [13]:
# Delayed development
# Global developmental delay HP:0001263
devMapper = SimpleColumnMapper(hpo_id='HP:0001263',
    hpo_label='Global developmental delay',
    observed='Yes',
    excluded='−')
#devMapper.preview_column(dft['Delayed development'])
column_mapper_d['Delayed development'] = devMapper

In [14]:
# Mental retardation
# Intellectual disability, mild HP:0001256
idMapper =  SimpleColumnMapper(hpo_id='HP:0001256',
    hpo_label='Intellectual disability, mild',
    observed='Mild',
    excluded='−')
#idMapper.preview_column(dft['Mental retardation'])
column_mapper_d['Mental retardation'] = idMapper

In [15]:
# Cryptorchidism HP:0000028
cryptorchidismMapper =  SimpleColumnMapper(hpo_id='HP:0000028',
    hpo_label='Cryptorchidism',
    observed='Yes',
    excluded='−')
#cryptorchidismMapper.preview_column(dft['Cryptorchidism'])
column_mapper_d['Cryptorchidism'] = cryptorchidismMapper

In [16]:
# Cubitus valgus HP:0002967
cvalMapper =  SimpleColumnMapper(hpo_id='HP:0002967',
    hpo_label='Cubitus valgus',
    observed='Yes',
    excluded='−')
#cvalMapper.preview_column(dft['Cubitus valgus'])
column_mapper_d['Cubitus valgus'] = cvalMapper

In [17]:
# Patient 1 had a small ectopic kidney
other_d = {'Splenomegaly': 'Splenomegaly',
         'Renal': 'Ectopic kidney', # from paper!
             }
otherMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_d)
#otherMapper.preview_column(dft['Others'])
column_mapper_d['Others'] = otherMapper


<h3>Variants</h3>
<p>By inspection in ClinVar, the three variants are: NM_002834.5(PTPN11):c.124A>G (p.Thr42Ala), NM_002834.5(PTPN11):c.922A>G (p.Asn308Asp), and  NM_002834.5(PTPN11):c.1510A>G (p.Met504Val) </p>

In [18]:
ptpn11_transcript='NM_002834.5'
vvalidator = VariantValidator(genome_build="hg38", transcript=ptpn11_transcript)
var_d = {}
for v in dft['transcript.hgvs'].unique():
    var = vvalidator.encode_hgvs(v)
    print(f"{v} - {var}")
    var_d[v] = var
varMapper = VariantColumnMapper(variant_d=var_d, 
                                variant_column_name='transcript.hgvs', 
                                default_genotype="heterozygous")

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.124A>G/NM_002834.5?content-type=application%2Fjson
c.124A>G - chr12:112446385A>G
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.922A>G/NM_002834.5?content-type=application%2Fjson
c.922A>G - chr12:112477719A>G
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_002834.5%3Ac.1510A>G/NM_002834.5?content-type=application%2Fjson
c.1510A>G - chr12:112489086A>G


In [19]:
ageMapper = AgeColumnMapper.by_year('Age')
#ageMapper.preview_column(dft['Age'])
sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Sex')
#sexMapper.preview_column(dft['Sex'])

In [20]:
encoder = CohortEncoder(df=dft, hpo_cr=hpo_cr, column_mapper_d=column_mapper_d, 
                        individual_column_name="patient_id", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        metadata=metadata,
                        variant_mapper=varMapper,
                        pmid=PMID)
noonan = Disease(disease_id="OMIM:163950", disease_label="Noonan syndrome 1")
encoder.set_disease(noonan)

In [21]:
individuals = encoder.get_individuals()
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(ontology=hpo_ontology, cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

TypeError: Individual.__init__() got an unexpected keyword argument 'pmid'

In [None]:
individuals = cvalidator.get_error_free_individual_list()
table = PhenopacketTable(individual_list=individuals, metadata=metadata)
display(HTML(table.to_html()))

In [None]:
Individual.output_individuals_as_phenopackets(individual_list=individuals,
                                              metadata=metadata)