# ZMYM3 individuals

In [1]:
import pandas as pd
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.54


In [2]:
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
created_by="ORCID:0000-0002-0736-9199"
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_excel("input/ZMYM3_individuals.xlsx")
df.head(2)

Unnamed: 0,PMID,title,individual_id,comment,disease_id,disease_label,transcript,allele_1,allele_2,variant.comment,...,HPO,Hypospadias,Horseshoe kidney,Enuresis nocturna,Cupped ear,Attention deficit hyperactivity disorder,Right aortic arch,Bicuspid aortic valve,Sleep abnormality,"Intellectual disability, moderate"
0,str,str,str,optional str,str,str,str,str,str,optional str,...,na,HP:0000047,HP:0000085,HP:0010677,HP:0000378,HP:0007018,HP:0012020,HP:0001647,HP:0002360,HP:0002342
1,PMID:24721225,X-exome sequencing in Finnish families with intellectual disability--four novel mutations and two novel syndromic phenotypes,Patient III-1,,OMIM:301111,"Intellectual developmental disorder, X-linked 112",NM_005096.3,c.1321C>T,na,R441W,...,na,P0D,excluded,observed,observed,observed,excluded,observed,observed,observed


In [4]:
encoder = CaseTemplateEncoder(df=df, hpo_cr=hpo_cr, created_by=created_by)
individuals = encoder.get_individuals()

Created encoders for 23 fields


In [5]:
vmanager = VariantManager(df=df,
                          individual_column_name="individual_id",
                          gene_symbol="ZMYM3",
                          transcript="NM_005096.3",
                          allele_1_column_name="allele_1")

In [6]:
vmanager.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,22,"c.2193G>C, c.1322G>A, c.507A>T, c.3409T>A, c.3820C>T, c.905G>A, c.1183C>A, c.3518G>A, c.1360T>C, c.2794A>G, c.3880C>T, c.3638T>C, c.2255A>G, c.1321C>T, c.205G>A, c.3371G>A, c.721G>A, c.1192C>T, c.3605T>A, c.3970C>T, c.4029G>A, c.671_674dup"
1,unmapped,0,


In [7]:
vmanager.add_variants_to_individuals(individuals)

In [9]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.MONO_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

In [10]:
individuals = cvalidator.get_error_free_individual_list()
table = IndividualTable(individuals)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
Patient III-1 (MALE; P15Y6M),"Intellectual developmental disorder, X-linked 112 (OMIM:301111)",NM_005096.3:c.1321C>T (heterozygous),"P0D: Hypospadias (HP:0000047) Enuresis nocturna (HP:0010677); Cupped ear (HP:0000378); Attention deficit hyperactivity disorder (HP:0007018); Bicuspid aortic valve (HP:0001647); Sleep abnormality (HP:0002360); Intellectual disability, moderate (HP:0002342); excluded: Horseshoe kidney (HP:0000085); excluded: Right aortic arch (HP:0012020)"
Patient III-2 (MALE; P8Y),"Intellectual developmental disorder, X-linked 112 (OMIM:301111)",NM_005096.3:c.1321C>T (heterozygous),"P0D: Hypospadias (HP:0000047); Horseshoe kidney (HP:0000085) Enuresis nocturna (HP:0010677); Cupped ear (HP:0000378); Attention deficit hyperactivity disorder (HP:0007018); Right aortic arch (HP:0012020); Bicuspid aortic valve (HP:0001647); Sleep abnormality (HP:0002360); Intellectual disability, moderate (HP:0002342)"
Patient III-3 (MALE; P7Y),"Intellectual developmental disorder, X-linked 112 (OMIM:301111)",NM_005096.3:c.1321C>T (heterozygous),"P0D: Hypospadias (HP:0000047) Cupped ear (HP:0000378); Sleep abnormality (HP:0002360); Intellectual disability, moderate (HP:0002342); excluded: Horseshoe kidney (HP:0000085); excluded: Enuresis nocturna (HP:0010677); excluded: Right aortic arch (HP:0012020); excluded: Bicuspid aortic valve (HP:0001647)"


In [11]:
encoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 3 GA4GH phenopackets to the directory phenopackets
