<h1>Sulfite Oxidase Deficiency SUOX</h1>
<p>Data from <a href="https://pubmed.ncbi.nlm.nih.gov/36303223/" target="__blank">Li JT, Chen ZX, Chen XJ, Jiang YX. Mutation analysis of SUOX in isolated sulfite oxidase deficiency with ectopia lentis as the presenting feature: insights into genotype-phenotype correlation. Orphanet J Rare Dis. 2022 Oct 27;17(1):392. doi: 10.1186/s13023-022-02544-x. PMID: 36303223; PMCID: PMC9615255.</a></p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import numpy as np
import os
import sys
import re

from pyphetools.creation import *


In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
metadata = MetaData(created_by="ORCID:0000-0003-2598-6622")
metadata.default_versions_with_hpo(version=hpo_version)

In [3]:
df = pd.read_excel('input/Li-SUOX-SupplTable-35.xlsx', skiprows=[2], header=[0,1], na_values=['n.a.'])

newCols = []
for col in df.columns.values:
    if 'Unnamed' in col[1]:
        newCol = col[0]
    else:
        newCol = '_'.join(col)
        
    if newCol in newCols:
        newCol = newCol + '_1'
    newCols.append(newCol)
    
df.columns = newCols



Unnamed: 0,Proband ID,Resource (PMID),Ethnicity,Gender,Parental consanguity,Age at onset (months),Variants_Nucleotide,Variants_Amino acid,Status,Clinical severity_Typical type/Mild type,...,Plasma_Homocys (umol/L),Plasma_Cys (umol/L),Plasma_UA (umol/L),Urine_Sulfite (mg/L),Urine_Thiosulfate,Urine_SSC (umol/mmolCr),Urine_Taurine (mmol/molCr),Urine_XA,Urine_HypoXA,Urine_UA
0,1,9050047,EUR,M,No,0.0,c.(433delC); (433delC),p.(Q145Sfs*16); (Q145Sfs*16),Homo,T,...,,2,,20-25,,320umol/L,95,,,
1,2,9600976,EUR,F,Yes,5.0,c.(650G>A); (650G>A),p.(R217Q); (R217Q),Homo,T,...,,,,0.108-0.211,0.297-1.632mmol/L,240umol/L,,0.04mmol/L,0.05mmol/L,0.14mmol/L
2,3,10519592,NAM,M,No,0.0,c.(794C>A); (1280C>A),p.(A265D); (S427*),Com het,T,...,,,normal,80-100,,690umol/L,,normal,normal,normal
3,4,12112661,,,Yes,,c.(733_736delCTTT); (733_736delCTTT),p.(L245Pfs*27); (L245Pfs*27),Homo,T,...,,,,,,,,,,
4,5,12112661,,,Yes,,c.(284_285insC); (1126C>T),p.(E97*); (R376C),Com het,T,...,,,,,,,,,,
5,6,12112661,,,Yes,,c.(772A>C); (772A>C),p.(I258L); (I258L),Homo,T,...,,,,,,,,,,
6,7,12112661,,,Yes,,c.(803G>A); (803G>A),p.(R268Q); (R268Q),Homo,T,...,,,,,,,,,,
7,8,12112661,,,Yes,,c.(1200C>G); (1200C>G),p.(Y400*); (Y400*),Homo,T,...,,,,,,,,,,
8,9,12112661,,,Yes,,c.(1261C>T); (1261C>T),p.(Q421*); (Q421*),Homo,T,...,,,,,,,,,,
9,10,12112661,,,Yes,,c.(1084G>A); (1084G>A),p.(G362S); (G362S),Homo,T,...,,,,,,,,,,


In [4]:
df['Gender'] = df['Gender'].fillna('')

In [5]:
new_age = []
for val in df['Age at onset (months)']:
    if 'n' in str(val):
        new_age.append(None)
    elif val == '':
        new_age.append(str(0))
    else:
        new_age.append(str(val/12))
    
df['NewAgeCol'] = new_age

In [6]:
genome = 'hg38'
transcript = 'NM_001032386.2'
varMapper = VariantColumnMapper(assembly=genome,column_name='Variant_Nucleotide',
                                transcript=transcript, genotype_column='Status', 
                                default_genotype='heterozygous', delimiter=';')

'Status'

In [7]:
column_mapper_d = defaultdict(ColumnMapper)

items = {
    'Neurological manifestations_Developmental delay': ['Neurodevelopmental delay', 'HP:0012758'],
    'Neurological manifestations_Regression': ['Cognitive regression', 'HP:0034332'],
    'Neurological manifestations_Seizure': ['Seizure', 'HP:0001250'],
    'Neurological manifestations_Extrapyramidal symptoms': ['Abnormality of extrapyramidal motor function', 'HP:0002071'],
    'Neurological manifestations_Hypertonia':['Hypertonia','HP:0001276'],
    'Neurological manifestations_Hypotonia': ['Hypotonia','HP:0001252'],
    'Neurological manifestations_Microcephaly':['Microcephaly', 'HP:0000252'],
    'Opthalmic manifestations_Ectopia lentis':['Ectopia lentis', 'HP:0001083'],
}

item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, observed='+',
    excluded='-')
print(f"We created {len(item_column_mapper_d)} simple column mappers")
# Transfor to column_mapper_d
for k, v in item_column_mapper_d.items():
    column_mapper_d[k] = v

We created 8 simple column mappers


In [8]:
ageMapper = AgeColumnMapper('AgeEncodingType.YEAR_AND_MONTH', 'NewAgeCol')
ageMapper.preview_column(df['NewAgeCol'])

sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Gender', unknown_symbol='')
sexMapper.preview_column(df['Gender'])

individual_column_name = 'Proband ID'

pmid = "PMID:36303223"
encoder = CohortEncoder(df=df, hpo_cr=hpo_cr, column_mapper_d=column_mapper_d, 
                        individual_column_name=individual_column_name,
                        agemapper=ageMapper, sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata,pmid=pmid)

encoder.set_disease(disease_id='ORPHA:833', label='Encephalopathy due to sulfite oxidase deficiency')

In [9]:
encoder.output_phenopackets(outdir='phenopackets')

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.433delC/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.650G>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.794C>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.1280C>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.733_736delCTTT/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.1126C>T/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_001032386.2%3Ac.284_285insC/NM_001032