# KDM6B, Rots D et al.

Data taken from  [Rots D, The clinical and molecular spectrum of the KDM6B-related neurodevelopmental disorder. Am J Hum Genet. 2023 ](https://pubmed.ncbi.nlm.nih.gov/37196654/)
Data extracted from Table S1. Detailed clinical information of the cases with the (likely) pathogenic KDM6B variants.

In [19]:
import phenopackets as PPkt
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
from pyphetools.visualization import *
import importlib.metadata
from IPython.display import HTML, display
__version__ = importlib.metadata.version("pyphetools")
print(f"Using pyphetools version {__version__}")

Using pyphetools version 0.8.3


In [7]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
PMID = "PMID:37196654"
title = "The clinical and molecular spectrum of the KDM6B-related neurodevelopmental disorder"
metadata = MetaData(created_by="ORCID:0000-0002-0736-9199", pmid=PMID, pubmed_title=title)
metadata.default_versions_with_hpo(version=hpo_version)

In [12]:
df = pd.read_excel('input/Rots_2023_PMID_37196654.xlsx')

In [13]:
df.head()

Unnamed: 0,Field,Individual 1,Individual 2,Individual 3,Individual 5,Individual 6,Individual 7,Individual 8,Individual 9,Individual 11,...,Individual 4,Individual 10,Individual 34,Individual 38,Individual 44 (DDD_286674),Individual 49 (DEASD_0146_001),Individual 50 (DEASD_0129_001),Individual 54 (SSC_13675.p1),Individual 58 (DDD_305030),Individual 59 (DDD_306396)
0,Sex,F,F,M,M,M,F,M,M,F,...,M,M,M,M,F,M,M,M,M,M
1,"Age, years",16,10,9,25,13y2m,9y6m,10,6y6m,19,...,14,4,11y,6,3,7y3m,8y7m,,,
2,Cohort type,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Clinical testing,...,Clinical testing,Clinical testing,Clinical testing,Clinical testing,Research and clinical testing,Research cohort,Research cohort,Research cohort,Research cohort,Research cohort
3,Mutation (NM_),1,2,3,4,5,6,7,8,9,...,64,65,66,67,68,69,70,71,72,73
4,cDNA change (ENST00000254846.9 or NM_001080424.2),c.1014delC,c.1085_1088del,c.654_655del,c.1439dup,c.2598delC,c.4500C>A,c.403C>T\n\n,c.4737+1G>A,c.3288_3291delTGAG,...,c.4696C>A,c.3762_3764del,c.4118T>C,c.4193C>A,c.4724G>C,c.4174G>A,c.4186T>A,c.4187_4189del,c.4187_4189del,c.4222T>C


In [14]:
dft = df.transpose()
dft.columns = dft.iloc[0]
dft.drop(dft.index[0], inplace=True)
dft.head()

Field,Sex,"Age, years",Cohort type,Mutation (NM_),cDNA change (ENST00000254846.9 or NM_001080424.2),Amino acid change,Variant Type (PTV or PAV),Inheritance,Heterozygous/Homozygous,Additional findings of genetic testing,...,Other,Other.1,Skin hyperlaxity,Genitourinary abnormalities,Cryptorchidism,Other medication received,Other.2,NaN,NaN.1,NaN.2
Individual 1,F,16,Clinical testing,1,c.1014delC,p.(Arg340Alafs*147),PTV,Maternal,Heterozygous,No,...,,,No,No,,Not reported,Nasal speech,,,
Individual 2,F,10,Clinical testing,2,c.1085_1088del,p.(Glu362Alafs*124),PTV,Paternal,Heterozygous,No,...,,,No,No,,No,2 Cafe-au-lait spots,Night incontinence,commonly head and abdominal pain,
Individual 3,M,9,Clinical testing,3,c.654_655del,p.(Glu220Glyfs*16),PTV,de novo,Heterozygous,Beta-thalasemia carrier,...,,,No,No,No,"Melatonine(sleep problems); Prednisolon, budesonide, salbutamol (asthma); macrogol (constipations); esomeprazole (GERD)",1 Cafe-au-lait spot,Adenotomy due to the hyperplasia,Bronchial asthma,Verry common airway infections
Individual 5,M,25,Clinical testing,4,c.1439dup,p.(Pro481Thrfs*29),PTV,de novo,Heterozygous,No,...,"Eats/drinks no cow's milk, no gluten and little soya. No allergy but seems sensitive to these products",,No,No,No,Vitamins and feeding supplements through alternative doctor,At metabolic screening increased essential amino acids alanin amongst others,,,
Individual 6,M,13y2m,Clinical testing,5,c.2598delC,p.(Ser867Argfs*27),PTV,de novo,Heterozygous,No,...,,,No,phimosis,No,No,tongue frenulum IQ because of dyslalia. Double appical hair whorl,,,


In [16]:
dft['patient_id'] = dft.index  # Set the new column 'patient_id' to be identical to the contents of the index
dft.head()

Field,Sex,"Age, years",Cohort type,Mutation (NM_),cDNA change (ENST00000254846.9 or NM_001080424.2),Amino acid change,Variant Type (PTV or PAV),Inheritance,Heterozygous/Homozygous,Additional findings of genetic testing,...,Other,Skin hyperlaxity,Genitourinary abnormalities,Cryptorchidism,Other medication received,Other.1,NaN,NaN.1,NaN.2,patient_id
Individual 1,F,16,Clinical testing,1,c.1014delC,p.(Arg340Alafs*147),PTV,Maternal,Heterozygous,No,...,,No,No,,Not reported,Nasal speech,,,,Individual 1
Individual 2,F,10,Clinical testing,2,c.1085_1088del,p.(Glu362Alafs*124),PTV,Paternal,Heterozygous,No,...,,No,No,,No,2 Cafe-au-lait spots,Night incontinence,commonly head and abdominal pain,,Individual 2
Individual 3,M,9,Clinical testing,3,c.654_655del,p.(Glu220Glyfs*16),PTV,de novo,Heterozygous,Beta-thalasemia carrier,...,,No,No,No,"Melatonine(sleep problems); Prednisolon, budesonide, salbutamol (asthma); macrogol (constipations); esomeprazole (GERD)",1 Cafe-au-lait spot,Adenotomy due to the hyperplasia,Bronchial asthma,Verry common airway infections,Individual 3
Individual 5,M,25,Clinical testing,4,c.1439dup,p.(Pro481Thrfs*29),PTV,de novo,Heterozygous,No,...,,No,No,No,Vitamins and feeding supplements through alternative doctor,At metabolic screening increased essential amino acids alanin amongst others,,,,Individual 5
Individual 6,M,13y2m,Clinical testing,5,c.2598delC,p.(Ser867Argfs*27),PTV,de novo,Heterozygous,No,...,,No,phimosis,No,No,tongue frenulum IQ because of dyslalia. Double appical hair whorl,,,,Individual 6


In [17]:
generator = SimpleColumnMapperGenerator(df=dft, observed="Yes", excluded="No", hpo_cr=hpo_cr)

In [18]:
column_d = generator.try_mapping_columns()

In [21]:
display(HTML(generator.to_html()))

Result,Columns
Mapped,Motor delay; Intellectual disability; Autism spectrum disorder; Sleep disturbances; Hypotonia; Spasticity; Joint hypermobility; Syndactyly; Pectus excavatum; Strabismus; Recurrent ear infections; Constipation; Cryptorchidism
Unmapped,"Sex; Age, years; Cohort type; Mutation (NM_); cDNA change (ENST00000254846.9 or NM_001080424.2); Amino acid change; Variant Type (PTV or PAV); Inheritance; Heterozygous/Homozygous; Additional findings of genetic testing; Other affected relatives; Pregnancy/delivery; Complications of Pregnancy/Delivery; Gestational age, weeks; Birth weight, g (SD); Growth; Height, cm (SD); Weight, kg(SD); Head circumference, cm(SD); Age at folow-up/measurements, years; Neurodevelopment; Language/speech delay; First words, months; First steps, months; IQ profile; nan; nan; Behavior problems; Psychosis / Schizophrenia; Use of psychiatric drugs; Other; Neurological; Seizures / Epilepsy; Dystonia, if present - type and age of onset; Other neurological/movement issues; Brain MRI findings; Musculoskeletal/extremities; Vertebral abnormalities (Scoliosis, kyphosis etc).; Hand /foot/ finger abnormalities; Other; Dysmorphism; Dysmorphic features; Lip/palate cleft; Eyes/visual problems; Hypermetropia/myopia; Other; Ear/ hearing problems; Hearing; Other; Cardiovascular; Congenital heart disease; Other; Gastrointestinal; Neonatal feeding difficulties; Yes; Other; Other; Skin hyperlaxity; Genitourinary abnormalities; Other medication received; Other; nan; nan; nan; patient_id"


In [24]:
dft["Behavior problems"].unique()

array(['Bossy, Agressive (verbally)', 'No',
       'Irritability, Anger, anxiety (associated with obstipation periods); No contact except parents and physician',
       'Yes', 'ADHD', 'AHDS; Aggression, problems in social interaction ',
       'Tantrums and inattention',
       'ADHD, aggression, problems in social interaction ',
       'AHDS, aggressive, impulsive behaivior ', 'Anxiety, aggression',
       'probable ADHD', 'Stubborn, "lazy"', 'Agitation,agressivity',
       'Aggressive behavior, noncompliance, physical aggression, poor play skills ',
       'Anxiety',
       'Early and atypical depression, attention deficit, anxiety, atypical sensory',
       'Yes, Aggression', 'ADHD, anxiety',
       'Poor social skills, stereotypic behaviour.',
       'stubborn, aggressive, tantrums', 'Short attention span',
       'Rigid behaviour', 'ADHD, aggressive behavior', nan,
       'hyperactivity', 'attention deficit', ' impulsive', 'ADHD  ',
       'Hyperactivity'], dtype=object)

In [27]:
option_d = {'Bossy, Agressive (verbally)':'Aggressive behavior',
           'Yes': 'Atypical behavior'}
excluded_d = {'No':'Atypical behavior'}
behaviorMapper = OptionColumnMapper(concept_recognizer=hpo_cr, option_d=option_d, excluded_d=excluded_d)
behaviorMapper.preview_column(dft["Behavior problems"])

Unnamed: 0,terms
0,HP:0000718 (Aggressive behavior/observed)
1,HP:0000708 (Atypical behavior/excluded)
2,
3,HP:0000708 (Atypical behavior/observed)
4,
...,...
68,HP:0000708 (Atypical behavior/excluded)
69,
70,
71,


In [32]:
result = OptionColumnMapper.autoformat(df=dft, concept_recognizer=hpo_cr, delimiter=";,")

In [40]:
dft['Sex'].unique()

array(['F', 'M'], dtype=object)

In [41]:
sexMapper = SexColumnMapper(male_symbol="M", female_symbol="F", column_name="Sex")

In [42]:
sexMapper.preview_column(dft['Sex'])

Unnamed: 0,original column contents,sex
0,F,FEMALE
1,F,FEMALE
2,M,MALE
3,M,MALE
4,M,MALE
...,...,...
68,M,MALE
69,M,MALE
70,M,MALE
71,M,MALE


In [44]:
var_list = dft["cDNA change (ENST00000254846.9 or NM_001080424.2)"].unique()

In [46]:
vvalidator = VariantValidator(genome_build="hg38", transcript="NM_001080424.2" )
variant_d = {}
for v in var_list:
    vvar = vvalidator.encode_hgvs(v)
    print(v)
    print(vvar)

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001080424.2%3Ac.1014delC/NM_001080424.2?content-type=application%2Fjson


ConnectionError: HTTPSConnectionPool(host='rest.variantvalidator.org', port=443): Max retries exceeded with url: /VariantValidator/variantvalidator/hg38/NM_001080424.2%3Ac.1014delC/NM_001080424.2?content-type=application%2Fjson (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7ff97cfa8f40>: Failed to resolve 'rest.variantvalidator.org' ([Errno -3] Temporary failure in name resolution)"))