# SRF1

The data in this notebook were dervied from [Bogaert E, et al. (2023) SRSF1 haploinsufficiency is responsible for a syndromic developmental disorder associated with intellectual disability. Am J Hum Genet; 110(5):790-808. PMID: 37071997]

Individual 6 and 17 were removed from the cohort because no SRF1 variant could be indentified in the original study.

In [1]:
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from IPython.display import display, HTML
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.15


In [2]:
PMID = "PMID:37071997"
title = "SRSF1 haploinsufficiency is responsible for a syndromic developmental disorder associated with intellectual disability"
cite = Citation(pmid=PMID, title=title)
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
metadata = MetaData(created_by="ORCID:0000-0002-5648-2155", citation=cite)
metadata.default_versions_with_hpo(version=hpo_version)
print(f"HPO version {hpo_version}")

HPO version 2023-10-09


In [3]:
df = pd.read_csv("input/suppdata.csv")
dft = df.transpose()
dft.columns = dft.iloc[0] 
dft.drop(dft.index[0], inplace=True)
dft['individual_id'] = dft.index 
dft.tail()

Subject,Sex,SRSF1 Genomic coordinates [ hg19 ),Transcript ( NM_006924.5 ),Protein,Mode of inheritance,Pregnancy complications,Gestational weeks( weeks + days ),Birth weight g ( SD ),Birth length cm ( SD ),Birth OFC cm ( SD ),...,Urogenital / kidney malformations,Other,Scoliosis,Pectus deformity,Other physical,Facial features,Cutaneous abnormalities,Additional clinical features,Additional molecular features,individual_id
Individual 12,Female,NC_000017.10 : g .56083875C > T,c.208G > A,p . ( Ala70Thr ),De novo,No,41 + 2,3700 ( +0.6 ),34 ( -0.5 ),34 ( -0.5 ),...,No,,No,No,,Thin upper lip; Hypertelorism; Short chin,No,Sleeping problems,MBD6 variant NM_052897.3 : c . 2337dup T,Individual 12
Individual 13,Female,NC_000017.10 : g .56084428G > A,c.71C > T,p . ( Pro24Leu ),De novo,No,39 + 2,3997 ( +1.7 ),50.8 ( +0.9 ),ND,...,No,,No,No,,Full cheeks; Epicanthal folds; Telecanthus; Thin upper lip,No,Sparse hair ;2 hemangiomas,,Individual 13
Individual 14,Male,NC_000017.10 : g .56083852A > C,c.231T > G,"p . ( Tyr77 "" )",De novo,Toxemia C - section,41,ND,ND,ND,...,No,"Hernia , diastasis",Yes,Pectus excavatum,"Marfan oid habitus with dolichostenomelia ,arachnodactyly ,flat feet andelbow flessum EquinovarusPostaxialhexadactyly on left foot Onydhodystrophy",Oval face; Thick lips; Pointed chin; Long nose;and thin nose; Microretrognathism; Downslanted palpebral fissures,,Moebius syndrome,,Individual 14
Individual 15,Male,NC_000017.10 : 56083832A > C,c.251T > G,p . ( Leu84Arg ),De novo,"Screening test positive : amniocentesis with karyotype 46 , XY",39,2980 ( -1 ),50 ( 0 ),35 ( +0.2 ),...,Balanopreputial hypospadias,-,Yes,Pectus carinatum,Marfanoid habitus with arachnodactyly Genu valgum Metatarsus varus,Triangular face; with facial asymmetry; Upslanted palpebral fissures; Long nose; and tubular nose; Long philtrum; and smooth philtrum; Retrognathia,No,Bifid uvula,,Individual 15
Individual 16,Female,NC 000017.10 : g . 56084369C > T,c.130G > A,p . ( Asp44Asn ),De novo,No,38 + 6,3065 ( +0.1 ),51 ( +1.3 ),34 ( +0.2 ),...,No,,No,No,,Downslanted palpebral fissures; Depressed nasal bridge; Midly downturned corners of the mouth,No,No,,Individual 16


In [4]:
generator = SimpleColumnMapperGenerator(df=dft,
                                  observed='Yes',
                                  excluded='No',
                                  hpo_cr=hpo_cr)
column_mapper_d = generator.try_mapping_columns()
display(HTML(generator.to_html()))

Result,Columns
Mapped,Failure to thrive; Hypotonia; Hearing loss; Seizures; Scoliosis; Pectus deformity
Unmapped,Sex; SRSF1 Genomic coordinates [ hg19 ); Transcript ( NM_006924.5 ); Protein; Mode of inheritance; Pregnancy complications; Gestational weeks( weeks + days ); Birth weight g ( SD ); Birth length cm ( SD ); Birth OFC cm ( SD ); Neonatal complications; Age at last examination ( Year + month ); Height at last visit cm ( SD ); Weight at last visit kg ( SD ); BMI at last visit kg / m2 ( SD ); OFC at last visit cm ( SD ); Truncal overweight; Intellectual Disability / Developmental Delay; Level of ID / DD; First words; Speech at last examination; Sitting position age; Walking age; Autistic features ( please describe ); Outbursts of anger; Other ASD features; Vision problems; Brain Structural anomalies from MRI; Cardiac malformations; Urogenital / kidney malformations; Other; Other physical; Facial features; Cutaneous abnormalities; Additional clinical features; Additional molecular features; individual_id


In [34]:
output = OptionColumnMapper.autoformat(df=dft, concept_recognizer=hpo_cr, delimiter=";")
print(output)

srsf1_genomic_coordinates_[_hg19_)_d = {'NC 000017.10 : g .56083708_56083709del': 'PLACEHOLDER',
 'NC_000017.10 : g .56083236C > T': 'PLACEHOLDER',
 'NC_000017.10 : g .56083237C > T': 'PLACEHOLDER',
 'NC_000017.10 : 8 . 56082935dup': 'PLACEHOLDER',
 'NC 000017.10 : g .56084417G > A': 'PLACEHOLDER',
 'NC_000017.10 : g .56083166T > C': 'PLACEHOLDER',
 'NC_000017.10 : g .56084380C > A': 'PLACEHOLDER',
 'NC 000017.10 : 56084402C > A': 'PLACEHOLDER',
 'NC_000017.10 : g .56082914del': 'PLACEHOLDER',
 'NC_000017.10 : g .56083875C > T': 'PLACEHOLDER',
 'NC_000017.10 : g .56084428G > A': 'PLACEHOLDER',
 'NC_000017.10 : g .56083852A > C': 'PLACEHOLDER',
 'NC_000017.10 : 56083832A > C': 'PLACEHOLDER',
 'NC 000017.10 : g . 56084369C > T': 'PLACEHOLDER'}
srsf1_genomic_coordinates_[_hg19_)Mapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=srsf1_genomic_coordinates_[_hg19_)_d)
srsf1_genomic_coordinates_[_hg19_)Mapper.preview_column(df['SRSF1 Genomic coordinates [ hg19 )'])
column_mapper_

In [6]:
complications_d = {'Oxygen requirement because of pulmonary hypoplasia': 'Pulmonary hypoplasia',
 #'No': 'PLACEHOLDER',
 'Congenital torticoli': 'Congenital muscular torticollis',
 #'RCIU': 'PLACEHOLDER',
 'hypocalcemia': 'Hypocalcemia',
 'Feeding difficulties Icterus': ['Feeding difficulties', 'Jaundice'],
 'Congenital heart defect': 'Abnormal heart morphology',
 'Transient tachypnea Increased muscle tone': ['Tachypnea' 'Hypertonia'],
 'Icterus': 'Jaundice',
 'Neonatal hypotonia Sucking difficulties': ['Neonatal hypotonia','Poor suck'],
 'Transient tachypnea of the newborn': 'Tachypnea'}
complicationsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=complications_d)
complicationsMapper.preview_column(dft['Neonatal complications'])
column_mapper_d['Neonatal complications'] = complicationsMapper

In [7]:
pregnancy_complications_d = {'IUGR': 'Intrauterine growth retardation',
 'Poor active fetal movements': 'Decreased fetal movement',
 #'C - section': 'PLACEHOLDER',
 #'No': 'PLACEHOLDER',
 'Ovarian stimulation C - section for breech positioning': 'Breech presentation',
 'Premature rupture of membranes at 31w + 2d': 'Premature rupture of membranes',
 'C - section for fetal distress': 'Fetal distress',
 #'No follow up': 'PLACEHOLDER',
  #'Toxemia C - section': 'Toxemia of pregnancy', #I don't know why this doesn't work!
 'Screening test positive : amniocentesis with karyotype 46': 'Female external genitalia in individual with 46,XY karyotype',
 'Increased nuchal tranlucency': 'Increased nuchal translucency'}
pregnancy_complicationsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=pregnancy_complications_d)
pregnancy_complicationsMapper.preview_column(dft['Pregnancy complications'])
column_mapper_d['Pregnancy complications'] = pregnancy_complicationsMapper

In [8]:
gestational_weeks_d = {'36': 'Premature birth',
 '31 + 2': 'Premature birth'}
gestational_weeksMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=gestational_weeks_d)
gestational_weeksMapper.preview_column(dft['Gestational weeks( weeks + days )'])
column_mapper_d['Gestational weeks( weeks + days )'] = gestational_weeksMapper

In [9]:
birth_weight_g_d = {'1700 ( -2.6 )': 'Small for gestational age',
 '1765 ( +0.6 )': 'Small for gestational age',
                   }
birth_weight_g_Mapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=birth_weight_g_d)
birth_weight_g_Mapper.preview_column(dft['Birth weight g ( SD )'])
column_mapper_d['Birth weight g ( SD )'] = birth_weight_g_Mapper


In [10]:
birth_length_cm_d = {'39 ( -3.6 )': 'Birth length less than 3rd percentile',
 '40.5 ( -0.2 )': 'Birth length less than 3rd percentile',
 '45 ( -1.8 )': 'Birth length less than 3rd percentile',
 '34 ( -0.5 )': 'Birth length less than 3rd percentile',
}
birth_length_cm_Mapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=birth_length_cm_d)
birth_length_cm_Mapper.preview_column(dft['Birth length cm ( SD )'])
column_mapper_d['Birth length cm ( SD )'] = birth_length_cm_Mapper

In [11]:
failure_to_thrive_d = {'Yes': 'Failure to thrive',
 'Yes Chewing diffuculty': ['Failure to thrive', 'Chewing difficulty'],
 'Infant reflux': 'Gastroesophageal reflux'}
excluded = {'No': 'Failure to thrive', } 
failure_to_thriveMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=failure_to_thrive_d, excluded_d=excluded)
failure_to_thriveMapper.preview_column(dft['Failure to thrive'])
column_mapper_d['Failure to thrive'] = failure_to_thriveMapper

In [12]:
truncal_overweight_d = {'Yes': 'Childhood-onset truncal obesity',
 'Trunk adiposity': 'Childhood-onset truncal obesity',
 'Mildly overweight': 'Overweight'}
excluded = {'No': 'Childhood-onset truncal obesity',}
truncal_overweightMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=truncal_overweight_d, excluded_d=excluded)
truncal_overweightMapper.preview_column(dft['Truncal overweight'])
column_mapper_d['Truncal overweight'] = truncal_overweightMapper

In [13]:
intellectual_disability_developmental_delay_d = {'Yes': ['Intellectual disability', 'Neurodevelopmental delay'],
 'Learning difficulties': 'Neurodevelopmental abnormality'}
intellectual_disability_developmental_delayMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=intellectual_disability_developmental_delay_d)
intellectual_disability_developmental_delayMapper.preview_column(dft['Intellectual Disability / Developmental Delay'])
column_mapper_d['Intellectual Disability / Developmental Delay'] = intellectual_disability_developmental_delayMapper

In [14]:
level_of_id_dd_d = {'Mild to moderate': 'Intellectual disability, moderate',
 'Mild to moderale': 'Intellectual disability, moderate',
 'IQ : 76 Verbal Comprehension Index : 92 Perceptual Reasoning Index : 84 Working memory Index : 66 Processing Speed Index : 75': 'Intellectual disability, borderline',
 'Mild': 'Intellectual disability, mild',
 'Severe ID Age Equivalent Scores at 5 years and 2months : developmental age : 6 months receptive language : 9 months expressive language : 5 months fine motor : 1 year 7 months interpersonal relationship : 1 year 6 months': 'Intellectual disability, severe',
 'Severe': 'Intellectual disability, severe',
 'Borderline level': 'Intellectual disability, borderline',
 'Moderate': 'Intellectual disability, moderate',
 'Cognitive evaluation ( Bayley II test ) at 3 years and 4 months : developmental age estimated at29 months old': 'Intellectual disability, mild'}
level_of_id_ddMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=level_of_id_dd_d)
level_of_id_ddMapper.preview_column(dft['Level of ID / DD'])
column_mapper_d['Level of ID / DD'] = level_of_id_ddMapper

In [15]:
first_words_d = {
 #'18 months': 'PLACEHOLDER',
 'After 18 months': 'Delayed speech and language development',
 #'15 years': 'Delayed speech and language development', WNL
 '24 months': 'Delayed speech and language development',
 '> 24 months': 'Delayed speech and language development'}
first_wordsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=first_words_d)
first_wordsMapper.preview_column(dft['First words'])
column_mapper_d['First words'] = first_wordsMapper

In [16]:
speech_at_last_examination_d = {'Short sentences': 'Delayed speech and language development',
 '50-100 words - poor articulation': 'Speech articulation difficulties',
 'Speech delay': 'Delayed speech and language development',
 'Delayed ( simple sentences )': 'Delayed speech and language development',
 'No speech': 'Absent speech',
 'mild articulation issue': 'Speech articulation difficulties',
 '2 words': 'Delayed speech and language development',
 'Simplified sentences': 'Delayed speech and language development',
 'Dealyed': 'Delayed speech and language development',
 'few words ( one syllable )': 'Delayed speech and language development',
 'Delayed - 30 single words at 2 years 2 months': 'Delayed speech and language development',
 'Poor ( 10-15 spontaneous words )': 'Delayed speech and language development'}
speech_at_last_examinationMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=speech_at_last_examination_d)
speech_at_last_examinationMapper.preview_column(dft['Speech at last examination'])
column_mapper_d['Speech at last examination'] = speech_at_last_examinationMapper

In [17]:
sitting_position_age_d = {
 '12 months': 'Delayed ability to sit',
 '10 months': 'Delayed ability to sit',
 '10 years': 'Delayed ability to sit',
 '11 months': 'Delayed gross motor development'}
sitting_position_ageMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=sitting_position_age_d)
sitting_position_ageMapper.preview_column(dft['Sitting position age'])
column_mapper_d['Sitting position age'] = sitting_position_ageMapper

In [18]:
# Threshold of 18 months
walking_age_d = {'4 years': 'Delayed ability to walk',
 '29 months': 'Delayed ability to walk',
 'Not acquired at last visit Up unaided : 21monts': 'Delayed ability to walk',
 '19 months': 'Delayed ability to walk',
 '23 months': 'Delayed ability to walk',
 '2 years 4 months': 'Delayed ability to walk',
 '2years 6 months': 'Delayed ability to walk',
 '11 years': 'Delayed ability to walk',
 '20 months': 'Delayed ability to walk',
 '1 year 11 months': 'Delayed ability to walk',
 '18-19 months': 'Delayed ability to walk',
 '24 months': 'Delayed ability to walk'}
walking_ageMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=walking_age_d)
walking_ageMapper.preview_column(dft['Walking age'])
column_mapper_d['Walking age'] = walking_ageMapper

In [19]:
hypotonia_d = {'Yes': 'Hypotonia',
 'Yes during infancy': 'Infantile muscular hypotonia',
 'Buccal hypotonia': 'Facial hypotonia',
 }
excluded = { 'No': 'Hypotonia',}
hypotoniaMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=hypotonia_d, excluded_d=excluded)
hypotoniaMapper.preview_column(dft['Hypotonia'])
column_mapper_d['Hypotonia'] = hypotoniaMapper

In [20]:
autistic_features_d = { 
 'Mild at 4 years with difficulties to adaptation to new situations': 'Impaired social interactions',
 'frustration intolerance': 'Low frustration tolerance',
 'rare heteroagressivity': 'Aggression towards others',
 #'resolved at 6 years': 'PLACEHOLDER',
 'Repetitive motions': 'Abnormal repetitive mannerisms',
 'very poor social interaction': 'Impaired social interactions',
 'overreaction to frustration': 'Low frustration tolerance',
 'Autistic featues but no formal ASD diagnosis Sensory issues better with age': ['Autistic behavior', 'Sensory behavioral abnormality'],
 'Flapping of the hands Feeling overwhelmed in social situations\nFeeling overwhelmed in social situations .': ['Recurrent hand flapping','Abnormal social behavior'], 
 'Social difficulties Obsessions Stereotypies': ['Insistence on sameness', 'Abnormal repetitive mannerisms'],
 'Stereotypies': 'Abnormal repetitive mannerisms',
 'Poor eye contact': 'Reduced eye contact',
 'No response to her name': 'Abnormal response to social norms',
 'Afraid of loud noises': 'Phonophobia',
 'Lines things up in a row': 'Atypical sorting',
 'Difficulties to adaptation': 'Impaired social interactions',
 'Autistic features': 'Autistic behavior'}
excluded = {'No': 'Autistic behavior'}
autistic_features_Mapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=autistic_features_d, excluded_d=excluded)
autistic_features_Mapper.preview_column(dft['Autistic features ( please describe )'])
column_mapper_d['Autistic features ( please describe )'] = autistic_features_Mapper

In [21]:
outbursts_of_anger_d = {'Yes': 'Abnormal temper tantrums',
 'Few': 'Abnormal temper tantrums',
 'until 9months': 'Abnormal temper tantrums',
 
 'less frequent with age': 'Abnormal temper tantrums',
 'Yes with frustration or specific circumstances': 'Abnormal temper tantrums'}
excluded = {'No': 'Abnormal temper tantrums'}
outbursts_of_angerMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=outbursts_of_anger_d, excluded_d=excluded)
outbursts_of_angerMapper.preview_column(dft['Outbursts of anger'])
column_mapper_d['Outbursts of anger'] = outbursts_of_angerMapper

In [22]:
other_autistic_features_d = { #'nan': 'PLACEHOLDER',
 'Stereotypy': 'Abnormal repetitive mannerisms',
 'Happy': 'Happy demeanor',
 'smiling': 'Happy demeanor',
 'ungoing': 'Social disinhibition',
 #'No': 'PLACEHOLDER',
 'Outbursts of crying or laughter': 'Abnormal affect',
 'ADHD diagnosed at 5 years': 'Attention deficit hyperactivity disorder',
# 'treated with medication': 'PLACEHOLDER',
 'Hyperactivity Tiptoe walking': 'Tip-toe gait',
 # 'Described as social with good eye contact': 'PLACEHOLDER',
    
  #'Intentional tremor': 'Tremor', #THIS IS MY PROBLEM HERE
    
 'Generalized anxiety disorder with phobic obsessive features': ['Anxiety', 'Phobia']}
other_autistic_featuresMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_autistic_features_d)
other_autistic_featuresMapper.preview_column(dft['Other ASD features'])
column_mapper_d['Other ASD features'] = other_autistic_featuresMapper

In [23]:
hearing_loss_d = { 
 'Bilateral low frequency hearing loss': 'Low-frequency hearing loss'}
excluded = {'No': 'Hearing impariment',}
hearing_lossMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=hearing_loss_d)
hearing_lossMapper.preview_column(dft['Hearing loss'])
column_mapper_d['Hearing loss'] = hearing_lossMapper

In [24]:
vision_problems_d = { 'Strabismus': 'Strabismus',
 'myopia': 'Myopia',
 'Myopia': 'Myopia',
 'Astigmatism and Strabismus': ['Strabismus', 'Astigmatism'],
 'Astigmatism Hypermetropia': ['Hypermetropia', 'Astigmatism'],
 'Hypermetropia': 'Hypermetropia',
 'Nystagmus Strabismus': ['Strabismus', 'Nystagmus'],
 'Severe unilateral myopia': 'Myopia'}
excluded = {'No': 'Abnormality of vision',}
vision_problemsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=vision_problems_d, excluded_d=excluded)
vision_problemsMapper.preview_column(dft['Vision problems'])
column_mapper_d['Vision problems'] = vision_problemsMapper

In [25]:
seizures_d = { 
 'EEG : some anomalies during the process of falling asleep': 'EEG abnormality',
 'Febrile seizure ( one time in in fancy )': 'Febrile seizure (within the age range of 3 months to 6 years)',
 'Myoclonus with fever': 'Myoclonus',
 'Staring spells with deviant gaze': 'Staring gaze'}
excluded = {'No': 'Seizure',
            'No seizure': 'Seizure',
            'No seizure Normal EEG': 'Seizure',
            'No seizure,Normal EEG': 'Seizure',
            'Normal EEG': 'Seizure',
            'No clinical seizure reported': 'Seizure',
            'not associated with clinical signs': 'Seizure',}
seizuresMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=seizures_d, excluded_d=excluded)
seizuresMapper.preview_column(dft['Seizures'])
column_mapper_d['Seizures'] = seizuresMapper

In [26]:
brain_structural_anomalies_from_mri_d = {'Global widening of the ventricular system': 'Ventriculomegaly',
 'cortical subcortical atrophy': 'Subcortical cerebral atrophy',
 'low amount of myelin': 'CNS hypomyelination',
'Myelinisation delay': 'Delayed CNS myelination',
 #'Normal MRI': 'PLACEHOLDER',
 'Thin splenium of the corpus callosum Leptomeningeal cyst of the left temporal lobe': 'Thin corpus callosum',
 'Microcephaly Small frontal lobes with abnormal white matter intensity Simplification of gyral pattern Delayed subcortical white matter myelination Small brain stem Atrophic lateral rectus muscles': ['Microcephaly', 'Hypoplasia of the frontal lobes', 'Hyperintensity of cerebral white matter on MRI', 'Simplified gyral pattern', 'CNS demyelination', 'Aplasia/Hypoplasia of the brainstem', 'Lateral rectus muscle weakness'],
 'Old bilateral caudothalamic groove (germinal matrix)hemorrhages and few punctate foci of old hemorrhage in the cerebellum. Few scattered foci of periventricular and subcortical T2 hyper intensity and Nonspecific but may be sequelae of prior injury': ['Grade I preterm intraventricular hemorrhage', 'Punctate periventricular T2 hyperintense foci'],
 'Midline pituitary cyst which may represent pars intermedia cyst or Rathes cleft cyst Otherwise normal': 'Pituitary gland cyst'
                                        }
brain_structural_anomalies_from_mriMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=brain_structural_anomalies_from_mri_d)
brain_structural_anomalies_from_mriMapper.preview_column(dft['Brain Structural anomalies from MRI'])
column_mapper_d['Brain Structural anomalies from MRI'] = brain_structural_anomalies_from_mriMapper

In [27]:
cardiac_malformations_d = {'ASD Pericardial effusion': 'Pericardial effusion',
 'Small ovale foramen': 'Patent foramen ovale',
# 'No': 'PLACEHOLDER',
 'Tricupsid valve leakage': 'Tricuspid valve regurgitation',
 'Ventricular septal defect ( spontaneous closure ) Right aortic arch Aberrant subclavian artery with subclavian steal': ['Ventricular septal defect', 'Right aortic arch','Abnormal subclavian artery morphology'],
 'Minimal aortic regurgitation Mitral regurgitation': 'Aortic regurgitation',
 'Myxoid dystrophy Cardiomegaly Mitral insufficiency': 'Mitral regurgitation'}
cardiac_malformationsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=cardiac_malformations_d)
cardiac_malformationsMapper.preview_column(dft['Cardiac malformations'])
column_mapper_d['Cardiac malformations'] = cardiac_malformationsMapper

In [28]:
urogenital_kidney_malformations_d = { #'No': 'PLACEHOLDER',
 'Right hydrocele': 'Hydrocele testis',
 'Cryptorchidism': 'Cryptorchidism',
 'hypospadia': 'Hypospadias',
 'Bilateral cryptorchidism': 'Bilateral cryptorchidism',
 'Yes': 'Abnormality of the genitourinary system',
 'Balanopreputial hypospadias': 'Hypospadias'}
urogenital_kidney_malformationsMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=urogenital_kidney_malformations_d)
urogenital_kidney_malformationsMapper.preview_column(dft['Urogenital / kidney malformations'])
column_mapper_d['Urogenital / kidney malformations'] = urogenital_kidney_malformationsMapper

In [29]:
ther_abdominal_d = {
 'Hernia': 'Hernia',
 'diastasis': 'Diastasis recti'}
other_abdominalMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_abdominal_d)
other_abdominalMapper.preview_column(dft['Other Abdominal'])
column_mapper_d['Other Abdominal'] = other_abdominalMapper

NameError: name 'other_abdominal_d' is not defined

In [30]:
pectus_deformity_d = {#'No': 'PLACEHOLDER',
 'Pectus excavatum': 'Pectus excavatum',
 'Pectus carinatum': 'Pectus carinatum',
}
excluded = {'No': 'Abnormal sternum morphology'}
pectus_deformityMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=pectus_deformity_d, excluded_d=excluded)
pectus_deformityMapper.preview_column(dft['Pectus deformity'])
column_mapper_d['Pectus deformity'] = pectus_deformityMapper

In [32]:
other_physical_d = {'Genu valum': 'Genu valgum',
 'Marfanoid habitus with arachnodactyly and elbow flessum': ['Disproportionate tall stature', 'Arachnodactyly'],
 'Brachymetatarsi a': 'Short metatarsal',
 'fallen arches': 'Pes planus',
 'L1 vertebral hypoplasia, with associated kyphosis': ['Vertebral hypoplasia','Kyphosis'],
 'Asymmetric chest': 'Asymmetry of the thorax',
 'Marfan oid habitus with dolichostenomelia': 'Disproportionate tall stature',
 'arachnodactyly': 'Arachnodactyly',
 'flat feet, and elbow flessum, Equinovarus, Postaxial hexadactyly on left foot, Onydhodystrophy': ['Pes planus', 'Limited elbow movement', 'Equinovarus deformity', 'Postaxial foot polydactyly', 'Congenital onychodystrophy'], 
 'Marfanoid habitus, with arachnodactyly, Genu valgum, Metatarsus varus': ['Disproportionate tall stature', 'Genu valgum', 'Metatarsus varus'],
 'Genu valgum': 'Genu valgum'
}
other_physicalMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=other_physical_d)
other_physicalMapper.preview_column(dft['Other physical'])
column_mapper_d['Other physical'] = other_physicalMapper

In [35]:
facial_features_d = {'Arched eyebrow': 'Highly arched eyebrow',
 'High andprominentforehead': 'High forehead',
 'Frontal bossing': 'Frontal bossing',
 'Asymmetric occipital plagiocephaly': 'Posterior plagiocephaly',
 'Triangular face': 'Triangular face',
 'Low anterior hairline': 'Low anterior hairline',
 'Prominent eyes': 'Proptosis',
 'Small ears': 'Microtia',
 'low-set ears': 'Low-set ears',
 'Wide nasal bridge': 'Wide nasal bridge',
 'Short philtrum High - archedpalate Downturned of corners ofmouth Prominent chin': 'Mandibular prognathia',
 'Thin upslanted eyebrows': 'Thin eyebrows',
 'Upslanted palpebral fissures': 'Upslanted palpebral fissure',
 'Deeply set eyes': 'Deeply set eye',
 'Buccal hypotonia': 'Hypotonia',
 'High-arched palate': 'High palate',
 #'Serious gaze': 'PLACEHOLDER',
 'Prominent chin': 'Mandibular prognathia',
 'epicanthus': 'Epicanthus',
 'Low-set ears': 'Low-set ears',
 'Long philtrum': 'Long philtrum',
 'Downturned of corners of mouth': 'Downturned corners of mouth',
 #'Chin crease': 'PLACEHOLDER',
 #'Aquiline nose': 'PLACEHOLDER',
 'Retrognathia': 'Retrognathia',
 'Protuding ears': 'Protruding ears',
 'High arched palate': 'High palate',
 'Malar hypoplasia': 'Malar flattening',
 'Long face': 'Long face',
 'Horizontal eyebrows': 'Horizontal eyebrow',
 'Bilateral epicanthus': 'Epicanthus',
 'Posteriorly rotated ears': 'Posteriorly rotated ears',
 #'Curved upper lip': 'PLACEHOLDER',
 'Hypertelorism': 'Hypertelorism',
 'Mild micrognathia': 'Micrognathia',
 'Smooth philtrum': 'Smooth philtrum',
 'Thin lips': 'Thin vermilion border',
 'Gingival hyperplasia': 'Gingival overgrowth',
 'Low set ears': 'Low-set ears',
 'Brachycephaly': 'Brachycephaly',
 'Left preauricular pit': 'Preauricular pit',
 'Narrow ear canals': 'Stenosis of the external auditory canal',
 'Thin upper lip vermilion': 'Thin upper lip vermilion',
 'Mildly broad / low posterior hairline': 'Low posterior hairline',
 'Downslanting palpebral fissures': 'Downslanted palpebral fissures',
 'Strabismus': 'Strabismus',
 'Maxillar hypoplasia': 'Maxillary hypoplasia',
 'Synophris': 'Synophrys',
 'Highly arched eyebrow': 'Highly arched eyebrow',
# 'Tubular nose': 'PLACEHOLDER',
 'with bulbous nasal tip': 'Bulbous nose',
 'Thin upper lips': 'Thin upper lip vermilion',
# 'Clinical suspicion of Cornelia de Lange syndrome': 'PLACEHOLDER',
 'Thin upper lip': 'Thin upper lip vermilion',
 'Short chin': 'Short chin',
 'Full cheeks': 'Full cheeks',
 'Epicanthal folds': 'Epicanthus',
 'Telecanthus': 'Telecanthus',
 'Oval face': 'Oval face',
 'Thick lips': 'Thick vermilion border',
 'Pointed chin': 'Pointed chin',
 'Long nose': 'Long nose',
 'and thin nose': 'Narrow nose',
 'Microretrognathism': 'Micrognathnia',
 'Downslanted palpebral fissures': 'Downslanted palpebral fissures',
 'with facial asymmetry': 'Facial asymmetry',
# 'and tubular nose': 'PLACEHOLDER',
 'and smooth philtrum': 'Smooth philtrum',
 'Depressed nasal bridge': 'Depressed nasal bridge',
 'Midly downturned corners of the mouth': 'Downturned corners of mouth'}
facial_featuresMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=facial_features_d)
facial_featuresMapper.preview_column(df['Facial features'])
#column_mapper_d['Facial features'] = facial_featuresMapper

KeyError: 'Facial features'

In [37]:
cutaneous_abnormalities_d = {'Stretch marks': 'Striae distensae',
 'Truncal and upper limb hyper trichosis, Palmar erythema': ['Thoracic hypertrichosis', 'Generalized hypertrichosis' 'Erythema'],
 'Hirsutism': 'Hirsutism',
}
cutaneous_abnormalitiesMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=cutaneous_abnormalities_d)
cutaneous_abnormalitiesMapper.preview_column(dft['Cutaneous abnormalities'])
column_mapper_d['Cutaneous abnormalities'] = cutaneous_abnormalitiesMapper

In [38]:
additional_clinical_features_d = {'Missing teeth, Endometriosis, Mixed malignant germ cell tumor of the left ovary': ['Abnormal number of permanent teeth', 'Endometriosis', 'Dysgerminoma'],
 'Relative pain in sensitivity': 'Pain insensitivity',
 'Relative pain insensitivity': 'Pain insensitivity',
 #'No': 'PLACEHOLDER',
 'Unexplained episodes of thrombocytosis': 'Thrombocytosis',
 'Sacro - coccygeal dimple': 'Sacral dimple',
 'Severe constipation': 'Constipation',
 'Sleeping problems': 'Sleep abnormality',
 'Sparse hair, 2 hemangiomas': ['Sparse hair', 'Hemangioma'],
 'Moebius syndrome': 'Congenital facial diplegia',
 'Bifid uvula': 'Bifid uvula'}
additional_clinical_featuresMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=additional_clinical_features_d)
additional_clinical_featuresMapper.preview_column(dft['Additional clinical features'])
column_mapper_d['Additional clinical features'] = additional_clinical_featuresMapper

In [41]:
import re
def decode_age(age):
    year_month_pattern = '(\d+) years and (\d+) month' # note we do not need to match final s
    year_pattern = '(\d+) years'
    month_pattern = '(\d+) months'
    year_month = re.search(year_month_pattern, age)
    if year_month:
        y = year_month.group(1)
        m = year_month.group(2)
        return f"P{y}Y{m}M"
    year = re.search(year_pattern, age)
    if year:
        y = year.group(1)
        return f"P{y}Y"
    month = re.search(month_pattern, age)
    if month:
        m = month.group(1)
        return f"P{m}M"
    # if we get here, then we failed to match - throw an error so we notice
    raise ValueError(f"Could not decode {age}")
    

dft['isoage'] = dft['Age at last examination ( Year + month )'].apply(lambda x: decode_age(x))

In [48]:
ageMapper = AgeColumnMapper.iso8601(column_name='isoage')
#ageMappper.preview_column(dft['isoage'])

In [50]:
sexMapper = SexColumnMapper(male_symbol='Male', female_symbol='Female', column_name='Sex')
#sexMapper.preview_column(dft['Sex'])

In [44]:
srsf1_transcript = "NM_006924.5"
vvalidator = VariantValidator(genome_build='hg38', transcript=srsf1_transcript)
dft['NM_006924.5'] = dft['Transcript ( NM_006924.5 )'].str.replace(" ","")
var_d = {}

for v in dft['NM_006924.5'].unique():
    print(v)
    var = vvalidator.encode_hgvs(v)
    var.set_heterozygous()
    var_d[v] = var
print(f"Retrieved {len(var_d)} unique variants from Variant Validator")

c.377_378del
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.377_378del/NM_006924.5?content-type=application%2Fjson
c.478G>A
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.478G>A/NM_006924.5?content-type=application%2Fjson
c.579dup
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.579dup/NM_006924.5?content-type=application%2Fjson
c.82C>T
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.82C>T/NM_006924.5?content-type=application%2Fjson
c.548A>G
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.548A>G/NM_006924.5?content-type=application%2Fjson
c.119G>T
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.119G>T/NM_006924.5?content-type=application%2Fjson
c.97G>T
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_006924.5%3Ac.97G

In [52]:
varMapper = VariantColumnMapper(variant_column_name="NM_006924.5",
                               variant_d=var_d,
                               default_genotype="heterozygous")

In [54]:
encoder = CohortEncoder(df=dft, 
                        hpo_cr=hpo_cr, 
                        column_mapper_d=column_mapper_d, 
                        individual_column_name="index", 
                        agemapper=ageMapper, 
                        sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata)
dee28 = Disease(disease_id='OMIM:616211', disease_label='Developmental and epileptic encephalopathy 28')
encoder.set_disease(dee28)

individuals = encoder.get_individuals()

In [56]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_html()))

ID,Level,Category,Message,HPO Term
PMID_37071997_Individual_10,ERROR,MALFORMED_LABEL,Invalid label 'Chewing difficulty' found for Impaired mastication (HP:0005216),Impaired mastication (HP:0005216)


In [None]:
# This means we need to go back and change the label from 'Chewing difficulty' to "Impaired mastication"
# Then continue with the 
# table = PhenopacketTable(individual_list=individual_list, metadata=metadata)
# display(HTML(table.to_html()))
# and
# Individual.output_individuals_as_phenopackets(individual_list=individual_list, 
#                                              metadata=metadata)