<h1>Sulfite Oxidase Deficiency SUOX</h1>
<p>Data from <a href="https://pubmed.ncbi.nlm.nih.gov/36303223/" target="__blank">Li JT, Chen ZX, Chen XJ, Jiang YX. Mutation analysis of SUOX in isolated sulfite oxidase deficiency with ectopia lentis as the presenting feature: insights into genotype-phenotype correlation. Orphanet J Rare Dis. 2022 Oct 27;17(1):392. doi: 10.1186/s13023-022-02544-x. PMID: 36303223; PMCID: PMC9615255.</a></p>

In [1]:
import phenopackets as php
from google.protobuf.json_format import MessageToDict, MessageToJson
from google.protobuf.json_format import Parse, ParseDict
import pandas as pd
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
import numpy as np
import os
import sys
import re

from pyphetools.creation import *


In [2]:
parser = HpoParser()
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
metadata = MetaData(created_by="ORCID:0000-0003-2598-6622")
metadata.default_versions_with_hpo(version=hpo_version)

In [3]:
df = pd.read_excel('input/Li-SUOX-SupplTable-35.xlsx', skiprows=[2], header=[0,1], na_values=['n.a.'])

newCols = []
for col in df.columns.values:
    if 'Unnamed' in col[1]:
        newCol = col[0]
    else:
        newCol = '_'.join(col)
        
    if newCol in newCols:
        newCol = newCol + '_1'
    newCols.append(newCol)
    
df.columns = newCols


In [4]:
df['Gender'] = df['Gender'].fillna('')

In [5]:
new_age = []
for val in df['Age at onset (months)']:
    if 'n' in str(val):
        new_age.append(None)
    elif val == '':
        new_age.append(str(0))
    else:
        new_age.append(str(val/12))
    
df['NewAgeCol'] = new_age

In [6]:
genome = 'hg19'
default_genotype = 'heterozygous'
transcript = 'NM_001032386.2'
varMapper = VariantColumnMapper(assembly=genome,column_name='Variant_Nucleotide',
                                transcript=transcript, genotype=default_genotype)

In [7]:
column_mapper_d = defaultdict(ColumnMapper)

items = {
    'Neurological manifestations_Developmental delay': ['Neurodevelopmental delay', 'HP:0012758'],
    'Neurological manifestations_Regression': ['Cognitive regression', 'HP:0034332'],
    'Neurological manifestations_Seizure': ['Seizure', 'HP:0001250'],
    'Neurological manifestations_Extrapyramidal symptoms': ['Abnormality of extrapyramidal motor function', 'HP:0002071'],
    'Neurological manifestations_Hypertonia':['Hypertonia','HP:0001276'],
    'Neurological manifestations_Hypotonia': ['Hypotonia','HP:0001252'],
    'Neurological manifestations_Microcephaly':['Microcephaly', 'HP:0000252'],
    'Opthalmic manifestations_Ectopia lentis':['Ectopia lentis', 'HP:0001083'],
}

item_column_mapper_d = hpo_cr.initialize_simple_column_maps(column_name_to_hpo_label_map=items, observed='+',
    excluded='-')
print(f"We created {len(item_column_mapper_d)} simple column mappers")
# Transfor to column_mapper_d
for k, v in item_column_mapper_d.items():
    column_mapper_d[k] = v

We created 8 simple column mappers


In [8]:
ageMapper = AgeColumnMapper('AgeEncodingType.YEAR_AND_MONTH', 'NewAgeCol')
ageMapper.preview_column(df['NewAgeCol'])

sexMapper = SexColumnMapper(male_symbol='M', female_symbol='F', column_name='Gender', unknown_symbol='')
sexMapper.preview_column(df['Gender'])

individual_column_name = 'Proband ID'

pmid = "PMID:36303223"
encoder = CohortEncoder(df=df, hpo_cr=hpo_cr, column_mapper_d=column_mapper_d, 
                        individual_column_name=individual_column_name,
                        agemapper=ageMapper, sexmapper=sexMapper,
                        variant_mapper=varMapper,
                        metadata=metadata,pmid=pmid)

encoder.set_disease(disease_id='ORPHA:833', label='Encephalopathy due to sulfite oxidase deficiency')

In [9]:
encoder.output_phenopackets(outdir='phenopackets')

https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.433delC/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.650G>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.794C>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.1280C>A/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.733_736delCTTT/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.1126C>T/NM_001032386.2?content-type=application%2Fjson
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg19/NM_001032386.2%3Ac.284_285insC/NM_001032