# MAP3K14 individuals

In [1]:
import pandas as pd
from IPython.display import display, HTML
pd.set_option('display.max_colwidth', None) # show entire column contents, important!
from collections import defaultdict
from pyphetools.creation import *
from pyphetools.visualization import *
from pyphetools.validation import *
import pyphetools
print(f"Using pyphetools version {pyphetools.__version__}")

Using pyphetools version 0.9.54


In [2]:
parser = HpoParser(hpo_json_file="../hp.json")
hpo_cr = parser.get_hpo_concept_recognizer()
hpo_version = parser.get_version()
hpo_ontology = parser.get_ontology()
created_by="ORCID:0000-0002-0736-9199"
print(f"HPO version {hpo_version}")

HPO version 2024-02-27


In [3]:
df = pd.read_excel("input/MAP3K14_individuals.xlsx")
df.head(2)

Unnamed: 0,PMID,title,individual_id,comment,disease_id,disease_label,transcript,allele_1,allele_2,variant.comment,...,T lymphocytopenia,Decreased proportion of gamma-delta T cells,Decreased proportion of class-switched memory B cells,Recurrent viral infections,Recurrent bacterial infections,BCGosis,BCGitis,Recurrent lower respiratory tract infections,Chronic mucocutaneous candidiasis,Chronic diarrhea
0,str,str,str,optional str,str,str,str,str,str,optional str,...,HP:0005403,HP:0500271,HP:0030388,HP:0004429,HP:0002718,HP:0020087,HP:0020086,HP:0002783,HP:0002728,HP:0002028
1,PMID:25406581,Biallelic loss-of-function mutation in NIK causes a primary immunodeficiency with multifaceted aberrant lymphoid immunity,P1,,OMIM:620449,Immunodeficiency 112,NM_003954.5,c.1694C>G,c.1694C>G,"c. C1694G, p. Pro565Ar",...,excluded,na,observed,observed,observed,observed,excluded,na,excluded,excluded


In [4]:
encoder = CaseTemplateEncoder(df=df, hpo_cr=hpo_cr, created_by=created_by)
individuals = encoder.get_individuals()

Created encoders for 31 fields


In [5]:
MAP3K14_transcript = "NM_003954.5"
vmanager = VariantManager(df=df,
                          individual_column_name="individual_id",
                          gene_symbol="MAP3K14",
                          transcript=MAP3K14_transcript,
                          allele_1_column_name="allele_1",
                          allele_2_column_name="allele_2")

[INFO] encoding variant "c.1694C>G"
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_003954.5%3Ac.1694C>G/NM_003954.5?content-type=application%2Fjson
[INFO] encoding variant "c.1033G>A"
https://rest.variantvalidator.org/VariantValidator/variantvalidator/hg38/NM_003954.5%3Ac.1033G>A/NM_003954.5?content-type=application%2Fjson


In [6]:
vmanager.to_summary()

Unnamed: 0,status,count,alleles
0,mapped,2,"c.1694C>G, c.1033G>A"
1,unmapped,0,


In [7]:
vmanager.add_variants_to_individuals(individuals)

In [8]:
cvalidator = CohortValidator(cohort=individuals, ontology=hpo_ontology, min_hpo=1, allelic_requirement=AllelicRequirement.BI_ALLELIC)
qc = QcVisualizer(cohort_validator=cvalidator)
display(HTML(qc.to_summary_html()))

In [9]:
individuals = cvalidator.get_error_free_individual_list()
table = IndividualTable(individuals)
display(HTML(table.to_html()))

Individual,Disease,Genotype,Phenotypic features
P1 (FEMALE; P9Y),Immunodeficiency 112 (OMIM:620449),NM_003954.5:c.1694C>G (homozygous),P10M: Decreased circulating IgG level (HP:0004315); Decreased circulating IgA level (HP:0002720) P9Y: Decreased circulating total IgM (HP:0002850) B lymphocytopenia (HP:0010976); Reduced natural killer cell count (HP:0040218); Decreased proportion of class-switched memory B cells (HP:0030388); Recurrent viral infections (HP:0004429); Recurrent bacterial infections (HP:0002718); BCGosis (HP:0020087); excluded: Increased B cell count (HP:0005404); excluded: Increased T cell count (HP:0100828); excluded: T lymphocytopenia (HP:0005403); excluded: BCGitis (HP:0020086); excluded: Chronic mucocutaneous candidiasis (HP:0002728); excluded: Chronic diarrhea (HP:0002028)
P2 (FEMALE; ),Immunodeficiency 112 (OMIM:620449),NM_003954.5:c.1694C>G (homozygous),P1Y3M: Decreased circulating IgA level (HP:0002720) P1Y9M: Decreased circulating total IgM (HP:0002850) B lymphocytopenia (HP:0010976); Reduced natural killer cell count (HP:0040218); Decreased proportion of class-switched memory B cells (HP:0030388); Recurrent viral infections (HP:0004429); Recurrent bacterial infections (HP:0002718); Recurrent lower respiratory tract infections (HP:0002783); Chronic mucocutaneous candidiasis (HP:0002728); Chronic diarrhea (HP:0002028); excluded: Increased B cell count (HP:0005404); excluded: Increased T cell count (HP:0100828); excluded: T lymphocytopenia (HP:0005403); excluded: BCGitis (HP:0020086)
proband (FEMALE; P6Y),Immunodeficiency 112 (OMIM:620449),NM_003954.5:c.1033G>A (homozygous),P2Y: BCGitis (HP:0020086) P3Y: BCGosis (HP:0020087) Decreased circulating IgG level (HP:0004315); Decreased circulating total IgM (HP:0002850); Decreased circulating IgA level (HP:0002720); Increased B cell count (HP:0005404); Increased T cell count (HP:0100828); Decreased proportion of gamma-delta T cells (HP:0500271); excluded: B lymphocytopenia (HP:0010976); excluded: Reduced natural killer cell count (HP:0040218); excluded: T lymphocytopenia (HP:0005403); excluded: Recurrent viral infections (HP:0004429); excluded: Recurrent bacterial infections (HP:0002718)


In [10]:
encoder.output_individuals_as_phenopackets(individual_list=individuals)

We output 3 GA4GH phenopackets to the directory phenopackets


# Create HPOA file

In [11]:
ingestor = PhenopacketIngestor(indir="phenopackets")
ppkt_d = ingestor.get_phenopacket_dictionary()
ppkt_list = list(ppkt_d.values())

[pyphetools] Ingested 3 GA4GH phenopackets.


In [12]:
builder = HpoaTableBuilder(phenopacket_list=ppkt_list)
PMID = "PMID:25406581" # Willmann et al, 2014
hpoa_table_creator = builder.autosomal_recessive(PMID).build()

We found a total of 17 unique HPO terms
Extracted disease: Immunodeficiency 112 (OMIM:620449)


In [13]:
hpoa_table_creator.get_dataframe()

Unnamed: 0,#diseaseID,diseaseName,phenotypeID,phenotypeName,onsetID,onsetName,frequency,sex,negation,modifier,description,publication,evidence,biocuration
0,OMIM:620449,Immunodeficiency 112,HP:0004315,Decreased circulating IgG level,,,1/1,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
1,OMIM:620449,Immunodeficiency 112,HP:0002720,Decreased circulating IgA level,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
2,OMIM:620449,Immunodeficiency 112,HP:0002850,Decreased circulating total IgM,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
3,OMIM:620449,Immunodeficiency 112,HP:0010976,B lymphocytopenia,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
4,OMIM:620449,Immunodeficiency 112,HP:0040218,Reduced natural killer cell count,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
5,OMIM:620449,Immunodeficiency 112,HP:0030388,Decreased proportion of class-switched memory B cells,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
6,OMIM:620449,Immunodeficiency 112,HP:0004429,Recurrent viral infections,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
7,OMIM:620449,Immunodeficiency 112,HP:0002718,Recurrent bacterial infections,,,2/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
8,OMIM:620449,Immunodeficiency 112,HP:0020087,BCGosis,,,1/1,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199
9,OMIM:620449,Immunodeficiency 112,HP:0005404,Increased B cell count,,,0/2,,,,,PMID:25406581,PCS,ORCID:0000-0002-0736-9199


In [14]:
hpoa_table_creator.write_data_frame()

Wrote HPOA disease file to OMIM-620449.tab
